1 // recursive descent parser for systemtap scripts
2 // Copyright (C) 2005-2019 Red Hat Inc.
3 // Copyright (C) 2006 Intel Corporation.
4 // Copyright (C) 2007 Bull S.A.S
5 // Copyright (C) 2014 Peter Kjellstrom <cap@nsc.liu.se>
7 // This file is part of systemtap, and is free software. You can
8 // redistribute it and/or modify it under the terms of the GNU General
9 // Public License (GPL); either version 2, or (at your option) any
17 #include "stringtable.h"
31 #include <unordered_set>
44 bool ate_comment
; // current token follows a comment
45 bool ate_whitespace
; // the most recent token followed whitespace
46 bool saw_tokens
; // the lexer found tokens (before preprocessing occurred)
47 bool check_compatible
; // whether to gate features on session.compatible
50 lexer (istream
&, const string
&, systemtap_session
&, bool);
51 void set_current_file (stapfile
* f
);
52 void set_current_token_chain (const token
* tok
);
53 inline bool has_version (const char* v
) const;
55 unordered_set
<interned_string
> keywords
;
56 static unordered_set
<string
> atwords
;
58 inline int input_get ();
59 inline int input_peek (unsigned n
=0);
60 void input_put (const string
&, const token
*);
62 string input_contents
; // NB: being a temporary, no need to interned_string optimize this object
63 const char *input_pointer
; // index into input_contents; NB: recompute if input_contents changed!
64 const char *input_end
;
65 unsigned cursor_suspend_count
;
66 unsigned cursor_suspend_line
;
67 unsigned cursor_suspend_column
;
69 unsigned cursor_column
;
70 systemtap_session
& session
;
71 stapfile
* current_file
;
72 const token
* current_token_chain
;
79 parser (systemtap_session
& s
, const string
& n
, istream
& i
, unsigned flags
=0);
83 probe
* parse_synthetic_probe (const token
* chain
);
84 stapfile
* parse_library_macros ();
95 struct pp1_activation
;
97 struct pp_macrodecl
: public macrodecl
{
98 pp1_activation
* parent_act
; // used for param bindings
99 virtual bool is_closure() { return parent_act
!= 0; }
100 pp_macrodecl () : macrodecl(), parent_act(0) { }
103 systemtap_session
& session
;
106 bool errs_as_warnings
;
110 parse_context context
;
112 // preprocessing subordinate, first pass (macros)
113 struct pp1_activation
{
115 unsigned cursor
; // position within macro body
116 map
<string
, pp_macrodecl
*> params
;
118 macrodecl
* curr_macro
;
120 pp1_activation (const token
* tok
, macrodecl
* curr_macro
)
121 : tok(tok
), cursor(0), curr_macro(curr_macro
) { }
125 map
<string
, macrodecl
*> pp1_namespace
;
126 vector
<pp1_activation
*> pp1_state
;
127 const token
* next_pp1 ();
128 const token
* scan_pp1 (bool ignore_macros
);
129 const token
* slurp_pp1_param (vector
<const token
*>& param
);
130 const token
* slurp_pp1_body (vector
<const token
*>& body
);
132 // preprocessing subordinate, final pass (conditionals)
133 vector
<pair
<const token
*, pp_state_t
> > pp_state
;
134 const token
* scan_pp ();
135 const token
* skip_pp ();
138 const token
* next ();
139 const token
* peek ();
141 // Advance past and throw away current token after peek () or next ().
144 const token
* systemtap_v_seen
;
145 const token
* last_t
; // the last value returned by peek() or next()
146 const token
* next_t
; // lookahead token
148 // expectations, these swallow the token
149 void expect_known (token_type tt
, string
const & expected
);
150 void expect_unknown (token_type tt
, interned_string
& target
);
151 void expect_unknown2 (token_type tt1
, token_type tt2
, interned_string
& target
);
153 // convenience forms, these also swallow the token
154 void expect_op (string
const & expected
);
155 interned_string
expect_op_any (initializer_list
<const char*> expected
);
156 void expect_kw (string
const & expected
);
157 void expect_number (int64_t & expected
);
158 void expect_ident_or_keyword (interned_string
& target
);
160 // convenience forms, which return true or false, these don't swallow token
161 bool peek_op (string
const & op
);
162 bool peek_kw (string
const & kw
);
164 // convenience forms, which return the token
165 const token
* expect_kw_token (string
const & expected
);
166 const token
* expect_ident_or_atword (interned_string
& target
);
168 void print_error (const parse_error
& pe
, bool errs_as_warnings
= false);
171 private: // nonterminals
172 void parse_probe (vector
<probe
*>&, vector
<probe_alias
*>&);
173 void parse_private (vector
<vardecl
*>&, vector
<probe
*>&,
174 string
const&, vector
<functiondecl
*>&);
175 void parse_global (vector
<vardecl
*>&, vector
<probe
*>&,
177 void do_parse_global (vector
<vardecl
*>&, vector
<probe
*>&,
178 string
const&, const token
*, bool);
179 void parse_functiondecl (vector
<functiondecl
*>&, string
const&);
180 void do_parse_functiondecl (vector
<functiondecl
*>&, const token
*,
181 string
const&, bool);
182 embeddedcode
* parse_embeddedcode ();
183 vector
<probe_point
*> parse_probe_points ();
184 vector
<probe_point
*> parse_components ();
185 vector
<probe_point
*> parse_component ();
186 literal_string
* consume_string_literals (const token
*);
187 literal_string
* parse_literal_string ();
188 literal
* parse_literal ();
189 block
* parse_stmt_block ();
190 try_block
* parse_try_block ();
191 statement
* parse_statement ();
192 if_statement
* parse_if_statement ();
193 for_loop
* parse_for_loop ();
194 for_loop
* parse_while_loop ();
195 foreach_loop
* parse_foreach_loop ();
196 expr_statement
* parse_expr_statement ();
197 return_statement
* parse_return_statement ();
198 delete_statement
* parse_delete_statement ();
199 next_statement
* parse_next_statement ();
200 break_statement
* parse_break_statement ();
201 continue_statement
* parse_continue_statement ();
202 indexable
* parse_indexable ();
203 const token
*parse_hist_op_or_bare_name (hist_op
*&hop
, interned_string
&name
);
204 target_symbol
*parse_target_symbol ();
205 cast_op
*parse_cast_op ();
206 atvar_op
*parse_atvar_op ();
207 expression
* parse_entry_op (const token
* t
);
208 expression
* parse_defined_op (const token
* t
);
209 expression
* parse_const_op (const token
* t
);
210 expression
* parse_perf_op (const token
* t
);
211 expression
* parse_target_register (const token
* t
);
212 expression
* parse_target_deref (const token
* t
);
213 expression
* parse_expression ();
214 expression
* parse_assignment ();
215 expression
* parse_ternary ();
216 expression
* parse_logical_or ();
217 expression
* parse_logical_and ();
218 expression
* parse_boolean_or ();
219 expression
* parse_boolean_xor ();
220 expression
* parse_boolean_and ();
221 expression
* parse_array_in ();
222 expression
* parse_comparison_or_regex_query ();
223 expression
* parse_shift ();
224 expression
* parse_concatenation ();
225 expression
* parse_additive ();
226 expression
* parse_multiplicative ();
227 expression
* parse_unary ();
228 expression
* parse_crement ();
229 expression
* parse_dwarf_value ();
230 expression
* parse_value ();
231 expression
* parse_symbol ();
233 bool peek_target_symbol_components ();
234 void parse_target_symbol_components (target_symbol
* e
);
238 // ------------------------------------------------------------------------
241 parse (systemtap_session
& s
, const string
& n
, istream
& i
, unsigned flags
)
243 parser
p (s
, n
, i
, flags
);
248 parse (systemtap_session
& s
, const string
& name
, unsigned flags
)
250 ifstream
i(name
.c_str(), ios::in
);
253 cerr
<< (file_exists(name
)
254 ? _F("Input file '%s' can't be opened for reading.", name
.c_str())
255 : _F("Input file '%s' is missing.", name
.c_str()))
260 parser
p (s
, name
, i
, flags
);
265 parse_library_macros (systemtap_session
& s
, const string
& name
)
267 ifstream
i(name
.c_str(), ios::in
);
270 cerr
<< (file_exists(name
)
271 ? _F("Input file '%s' can't be opened for reading.", name
.c_str())
272 : _F("Input file '%s' is missing.", name
.c_str()))
277 parser
p (s
, name
, i
);
278 return p
.parse_library_macros ();
282 parse_synthetic_probe (systemtap_session
&s
, istream
& i
, const token
* tok
)
284 parser
p (s
, tok
? tok
->location
.file
->name
: "<synthetic>", i
);
285 return p
.parse_synthetic_probe (tok
);
288 // ------------------------------------------------------------------------
290 parser::parser (systemtap_session
& s
, const string
&n
, istream
& i
, unsigned flags
):
291 session (s
), input_name (n
), input (i
, input_name
, s
, !(flags
& pf_no_compatible
)),
292 errs_as_warnings(flags
& pf_squash_errors
), privileged (flags
& pf_guru
),
293 user_file (flags
& pf_user_file
), auto_path (flags
& pf_auto_path
),
294 context(con_unknown
), systemtap_v_seen(0), last_t (0), next_t (0), num_errors (0)
303 tt2str(token_type tt
)
307 case tok_junk
: return "junk";
308 case tok_identifier
: return "identifier";
309 case tok_operator
: return "operator";
310 case tok_string
: return "string";
311 case tok_number
: return "number";
312 case tok_embedded
: return "embedded-code";
313 case tok_keyword
: return "keyword";
315 return "unknown token";
319 operator << (ostream
& o
, const source_loc
& loc
)
321 o
<< loc
.file
->name
<< ":"
329 operator << (ostream
& o
, const token
& t
)
333 if (t
.type
!= tok_embedded
&& t
.type
!= tok_keyword
) // XXX: other types?
336 for (unsigned i
=0; i
<t
.content
.length(); i
++)
338 char c
= t
.content
[i
];
339 o
<< (isprint (c
) ? c
: '?');
352 parser::print_error (const parse_error
&pe
, bool errs_as_warnings
)
354 const token
*tok
= pe
.tok
? pe
.tok
: last_t
;
355 session
.print_error(pe
, tok
, input_name
, errs_as_warnings
);
362 template <typename OPERAND
>
363 bool eval_comparison (const OPERAND
& lhs
, const token
* op
, const OPERAND
& rhs
)
365 if (op
->type
== tok_operator
&& op
->content
== "<=")
366 { return lhs
<= rhs
; }
367 else if (op
->type
== tok_operator
&& op
->content
== ">=")
368 { return lhs
>= rhs
; }
369 else if (op
->type
== tok_operator
&& op
->content
== "<")
370 { return lhs
< rhs
; }
371 else if (op
->type
== tok_operator
&& op
->content
== ">")
372 { return lhs
> rhs
; }
373 else if (op
->type
== tok_operator
&& op
->content
== "==")
374 { return lhs
== rhs
; }
375 else if (op
->type
== tok_operator
&& op
->content
== "!=")
376 { return lhs
!= rhs
; }
378 throw PARSE_ERROR (_("expected comparison operator"), op
);
382 // Here, we perform on-the-fly preprocessing in two passes.
384 // First pass - macro declaration and expansion.
386 // The basic form of a declaration is @define SIGNATURE %( BODY %)
387 // where SIGNATURE is of the form macro_name (a, b, c, ...)
388 // and BODY can obtain the parameter contents as @a, @b, @c, ....
389 // Note that parameterless macros can also be declared.
391 // Macro definitions may not be nested.
392 // A macro is available textually after it has been defined.
394 // The basic form of a macro invocation
395 // for a parameterless macro is @macro_name,
396 // for a macro with parameters is @macro_name(param_1, param_2, ...).
398 // NB: this means that a parameterless macro @foo called as @foo(a, b, c)
399 // leaves its 'parameters' alone, rather than consuming them to result
400 // in a "too many parameters error". This may be useful in the unusual
401 // case of wanting @foo to expand to the name of a function.
403 // Invocations of unknown macros are left unexpanded, to allow
404 // the continued use of constructs such as @cast, @var, etc.
406 macrodecl::~macrodecl ()
409 for (vector
<const token
*>::iterator it
= body
.begin();
410 it
!= body
.end(); it
++)
414 parser::pp1_activation::~pp1_activation ()
417 if (curr_macro
->is_closure()) return; // body is shared with an earlier declaration
418 for (map
<string
, pp_macrodecl
*>::iterator it
= params
.begin();
419 it
!= params
.end(); it
++)
423 // Grab a token from the current input source (main file or macro body):
427 if (pp1_state
.empty())
428 return input
.scan ();
430 // otherwise, we're inside a macro
431 pp1_activation
* act
= pp1_state
.back();
432 unsigned& cursor
= act
->cursor
;
433 if (cursor
< act
->curr_macro
->body
.size())
435 token
* t
= new token(*act
->curr_macro
->body
[cursor
]);
436 t
->chain
= new token(*act
->tok
); // mark chained token
441 return 0; // reached end of macro body
445 parser::scan_pp1 (bool ignore_macros
= false)
449 const token
* t
= next_pp1 ();
450 if (t
== 0) // EOF or end of macro body
452 if (pp1_state
.empty()) // actual EOF
455 // Exit macro and loop around to look for the next token.
456 pp1_activation
* act
= pp1_state
.back();
457 pp1_state
.pop_back(); delete act
;
462 // PR18462 don't catalog preprocessor-disabled macros
463 if (t
->type
== tok_operator
&& t
->content
== "@define" && !ignore_macros
)
465 if (!pp1_state
.empty())
466 throw PARSE_ERROR (_("'@define' forbidden inside macro body"), t
);
469 // handle macro definition
470 // (1) consume macro signature
472 if (! (t
&& t
->type
== tok_identifier
))
473 throw PARSE_ERROR (_("expected identifier"), t
);
474 string name
= t
->content
;
476 // check for redefinition of existing macro
477 if (pp1_namespace
.find(name
) != pp1_namespace
.end())
479 parse_error
er (ERR_SRC
, _F("attempt to redefine macro '@%s' in the same file", name
.c_str ()), t
);
481 // Also point to pp1_namespace[name]->tok, the site of
482 // the original definition:
483 er
.chain
= new PARSE_ERROR (_F("macro '@%s' first defined here",
484 name
.c_str()), pp1_namespace
[name
]->tok
);
488 // XXX: the above restriction was mostly necessary due to
489 // wanting to leave open the possibility of
490 // statically-scoped semantics in the future.
492 // XXX: this cascades into further parse errors as the
493 // parser tries to parse the remaining definition... (e.g.
494 // it can't tell that the macro body isn't a conditional,
495 // that the uses of parameters aren't nonexistent
497 if (name
== "define")
498 throw PARSE_ERROR (_("attempt to redefine '@define'"), t
);
499 if (input
.atwords
.count(name
))
500 session
.print_warning (_F("macro redefines built-in operator '@%s'", name
.c_str()), t
);
502 macrodecl
* decl
= (pp1_namespace
[name
] = new macrodecl
);
505 // determine if the macro takes parameters
506 bool saw_params
= false;
508 if (t
&& t
->type
== tok_operator
&& t
->content
== "(")
516 if (! (t
&& t
->type
== tok_identifier
))
517 throw PARSE_ERROR(_("expected identifier"), t
);
518 decl
->formal_args
.push_back(t
->content
);
522 if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
526 else if (t
&& t
->type
== tok_operator
&& t
->content
== ")")
534 throw PARSE_ERROR (_("expected ',' or ')'"), t
);
540 // (2) identify & consume macro body
541 if (! (t
&& t
->type
== tok_operator
&& t
->content
== "%("))
544 throw PARSE_ERROR (_("expected '%('"), t
);
546 throw PARSE_ERROR (_("expected '%(' or '('"), t
);
550 t
= slurp_pp1_body (decl
->body
);
552 throw PARSE_ERROR (_("incomplete macro definition - missing '%)'"), decl
->tok
);
555 // Now loop around to look for a real token.
559 // (potential) macro invocation
560 if (t
->type
== tok_operator
&& t
->content
[0] == '@')
562 const string
& name
= t
->content
.substr(1); // strip initial '@'
564 // check if name refers to a real parameter or macro
566 pp1_activation
* act
= pp1_state
.empty() ? 0 : pp1_state
.back();
567 if (act
&& act
->params
.find(name
) != act
->params
.end())
568 decl
= act
->params
[name
];
569 else if (!(act
&& act
->curr_macro
->context
== ctx_library
)
570 && pp1_namespace
.find(name
) != pp1_namespace
.end())
571 decl
= pp1_namespace
[name
];
572 else if (session
.library_macros
.find(name
)
573 != session
.library_macros
.end())
574 decl
= session
.library_macros
[name
];
575 else // this is an ordinary @operator
578 // handle macro invocation, taking ownership of t
579 pp1_activation
*new_act
= new pp1_activation(t
, decl
);
580 unsigned num_params
= decl
->formal_args
.size();
582 // (1a) restore parameter invocation closure
583 if (num_params
== 0 && decl
->is_closure())
585 // NB: decl->parent_act is always safe since the
586 // parameter decl (if any) comes from an activation
587 // record which deeper in the stack than new_act.
589 // decl is a macro parameter which must be evaluated in
590 // the context of the original point of invocation:
591 new_act
->params
= ((pp_macrodecl
*)decl
)->parent_act
->params
;
595 // (1b) consume macro parameters (if any)
599 // for simplicity, we do not allow macro constructs here
600 // -- if we did, we'd have to recursively call scan_pp1()
602 if (! (t
&& t
->type
== tok_operator
&& t
->content
== "("))
605 throw PARSE_ERROR (_NF
606 ("expected '(' in invocation of macro '@%s'"
607 " taking %d parameter",
608 "expected '(' in invocation of macro '@%s'"
609 " taking %d parameters",
610 num_params
, name
.c_str(), num_params
), t
);
613 // XXX perhaps parse/count the full number of params,
614 // so we can say "expected x, found y params" on error?
615 for (unsigned i
= 0; i
< num_params
; i
++)
619 // create parameter closure
620 string param_name
= decl
->formal_args
[i
];
621 pp_macrodecl
* p
= (new_act
->params
[param_name
]
623 p
->tok
= new token(*new_act
->tok
);
625 // NB: *new_act->tok points to invocation, act is NULL at top level
627 t
= slurp_pp1_param (p
->body
);
629 // check correct usage of ',' or ')'
630 if (t
== 0) // hit unexpected EOF or end of macro
632 // XXX could we pop the stack and continue parsing
633 // the invocation, allowing macros to construct new
634 // invocations in piecemeal fashion??
635 const token
* orig_t
= new token(*new_act
->tok
);
637 throw PARSE_ERROR (_("could not find end of macro invocation"), orig_t
);
639 if (t
->type
== tok_operator
&& t
->content
== ",")
641 if (i
+ 1 == num_params
)
644 throw PARSE_ERROR (_F("too many parameters for macro '@%s' (expected %d)", name
.c_str(), num_params
), t
);
647 else if (t
->type
== tok_operator
&& t
->content
== ")")
649 if (i
+ 1 != num_params
)
652 throw PARSE_ERROR (_F("too few parameters for macro '@%s' (expected %d)", name
.c_str(), num_params
), t
);
657 // XXX this is, incidentally, impossible
659 throw PARSE_ERROR(_("expected ',' or ')' after macro parameter"), t
);
665 // (2) set up macro expansion
667 pp1_state
.push_back (new_act
);
669 // Now loop around to look for a real token.
673 // Otherwise, we have an ordinary token.
678 // Consume a single macro invocation's parameters, heeding nesting
679 // brackets and stopping on an unbalanced ')' or an unbracketed ','
680 // (and returning the final separator token).
682 parser::slurp_pp1_param (vector
<const token
*>& param
)
685 unsigned nesting
= 0;
692 // [ needed in case macro paramater is used as prefix for array-deref operation
693 if (t
->type
== tok_operator
&& (t
->content
== "(" || t
->content
== "["))
695 else if (nesting
&& t
->type
== tok_operator
&& (t
->content
== ")" || t
->content
== "]"))
697 else if (!nesting
&& t
->type
== tok_operator
698 && (t
->content
== ")" || t
->content
== ","))
703 return t
; // report ")" or "," or NULL
707 // Consume a macro declaration's body, heeding nested %( %) brackets.
709 parser::slurp_pp1_body (vector
<const token
*>& body
)
712 unsigned nesting
= 0;
719 if (t
->type
== tok_operator
&& t
->content
== "%(")
721 else if (nesting
&& t
->type
== tok_operator
&& t
->content
== "%)")
723 else if (!nesting
&& t
->type
== tok_operator
&& t
->content
== "%)")
728 return t
; // report final "%)" or NULL
731 // Used for parsing .stpm files.
733 parser::parse_library_macros ()
735 stapfile
* f
= new stapfile
;
736 f
->privileged
= this->privileged
;
737 input
.set_current_file (f
);
741 const token
* t
= scan_pp ();
743 // Currently we only take objection to macro invocations if they
744 // produce a non-whitespace token after being expanded.
746 // XXX should we prevent macro invocations even if they expand to empty??
749 throw PARSE_ERROR (_F("unexpected token in library macro file '%s'", input_name
.c_str()), t
);
751 // We need to first check whether *any* of the macros are duplicates,
752 // then commit to including the entire file in the global namespace
754 for (map
<string
, macrodecl
*>::iterator it
= pp1_namespace
.begin();
755 it
!= pp1_namespace
.end(); it
++)
757 string name
= it
->first
;
759 if (session
.library_macros
.find(name
) != session
.library_macros
.end())
761 parse_error
er(ERR_SRC
, _F("duplicate definition of library macro '@%s'", name
.c_str()), it
->second
->tok
);
762 er
.chain
= new PARSE_ERROR (_F("macro '@%s' first defined here", name
.c_str()), session
.library_macros
[name
]->tok
);
772 catch (const parse_error
& pe
)
774 print_error (pe
, errs_as_warnings
);
779 // If no errors, include the entire file. Note how this is outside
780 // of the try-catch block -- no errors possible.
781 for (map
<string
, macrodecl
*>::iterator it
= pp1_namespace
.begin();
782 it
!= pp1_namespace
.end(); it
++)
784 string name
= it
->first
;
786 session
.library_macros
[name
] = it
->second
;
787 session
.library_macros
[name
]->context
= ctx_library
;
793 // Second pass - preprocessor conditional expansion.
795 // The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
796 // where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
797 // or: arch COMPARISON-OP "arch-string"
798 // or: systemtap_v COMPARISON-OP "version-string"
799 // or: systemtap_privilege COMPARISON-OP "privilege-string"
800 // or: CONFIG_foo COMPARISON-OP "config-string"
801 // or: CONFIG_foo COMPARISON-OP number
802 // or: CONFIG_foo COMPARISON-OP CONFIG_bar
803 // or: "string1" COMPARISON-OP "string2"
804 // or: number1 COMPARISON-OP number2
805 // The %: ELSE-TOKENS part is optional.
807 // e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
808 // e.g. %( arch != "i?86" %? "foo" %: "baz" %)
809 // e.g. %( CONFIG_foo %? "foo" %: "baz" %)
811 // Up to an entire %( ... %) expression is processed by a single call
812 // to this function. Tokens included by any nested conditions are
813 // enqueued in a private vector.
815 bool eval_pp_conditional (systemtap_session
& s
,
816 const token
* l
, const token
* op
, const token
* r
)
818 if (l
->type
== tok_identifier
&& (l
->content
== "kernel_v" ||
819 l
->content
== "kernel_vr" ||
820 l
->content
== "systemtap_v"))
822 if (! (r
->type
== tok_string
))
823 throw PARSE_ERROR (_("expected string literal"), r
);
825 string target_kernel_vr
= s
.kernel_release
;
826 string target_kernel_v
= s
.kernel_base_release
;
829 if (l
->content
== "kernel_v") target
= target_kernel_v
;
830 else if (l
->content
== "kernel_vr") target
= target_kernel_vr
;
831 else if (l
->content
== "systemtap_v") target
= s
.compatible
;
834 string query
= r
->content
;
835 bool rhs_wildcard
= (strpbrk (query
.c_str(), "*?[") != 0);
837 // collect acceptable strverscmp results.
838 int rvc_ok1
, rvc_ok2
;
840 if (op
->type
== tok_operator
&& op
->content
== "<=")
841 { rvc_ok1
= -1; rvc_ok2
= 0; }
842 else if (op
->type
== tok_operator
&& op
->content
== ">=")
843 { rvc_ok1
= 1; rvc_ok2
= 0; }
844 else if (op
->type
== tok_operator
&& op
->content
== "<")
845 { rvc_ok1
= -1; rvc_ok2
= -1; }
846 else if (op
->type
== tok_operator
&& op
->content
== ">")
847 { rvc_ok1
= 1; rvc_ok2
= 1; }
848 else if (op
->type
== tok_operator
&& op
->content
== "==")
849 { rvc_ok1
= 0; rvc_ok2
= 0; wc_ok
= true; }
850 else if (op
->type
== tok_operator
&& op
->content
== "!=")
851 { rvc_ok1
= -1; rvc_ok2
= 1; wc_ok
= true; }
853 throw PARSE_ERROR (_("expected comparison operator"), op
);
855 if ((!wc_ok
) && rhs_wildcard
)
856 throw PARSE_ERROR (_("wildcard not allowed with order comparison operators"), op
);
860 int rvc_result
= fnmatch (query
.c_str(), target
.c_str(),
861 FNM_NOESCAPE
); // spooky
862 bool badness
= (rvc_result
== 0) ^ (op
->content
== "==");
867 int rvc_result
= strverscmp (target
.c_str(), query
.c_str());
868 // normalize rvc_result
869 if (rvc_result
< 0) rvc_result
= -1;
870 if (rvc_result
> 0) rvc_result
= 1;
871 return (rvc_result
== rvc_ok1
|| rvc_result
== rvc_ok2
);
874 else if (l
->type
== tok_identifier
&& l
->content
== "systemtap_privilege")
876 string target_privilege
=
877 pr_contains(s
.privilege
, pr_stapdev
) ? "stapdev"
878 : pr_contains(s
.privilege
, pr_stapsys
) ? "stapsys"
879 : pr_contains(s
.privilege
, pr_stapusr
) ? "stapusr"
880 : "none"; /* should be impossible -- s.privilege always one of above */
881 assert(target_privilege
!= "none");
883 if (! (r
->type
== tok_string
))
884 throw PARSE_ERROR (_("expected string literal"), r
);
885 string query_privilege
= r
->content
;
887 bool nomatch
= (target_privilege
!= query_privilege
);
890 if (op
->type
== tok_operator
&& op
->content
== "==")
892 else if (op
->type
== tok_operator
&& op
->content
== "!=")
895 throw PARSE_ERROR (_("expected '==' or '!='"), op
);
896 /* XXX perhaps allow <= >= and similar comparisons */
900 else if (l
->type
== tok_identifier
&& l
->content
== "guru_mode")
902 if (! (r
->type
== tok_number
))
903 throw PARSE_ERROR (_("expected number"), r
);
904 int64_t lhs
= (int64_t) s
.guru_mode
;
905 int64_t rhs
= lex_cast
<int64_t>(r
->content
);
906 if (!((rhs
== 0)||(rhs
== 1)))
907 throw PARSE_ERROR (_("expected 0 or 1"), op
);
908 if (!((op
->type
== tok_operator
&& op
->content
== "==") ||
909 (op
->type
== tok_operator
&& op
->content
== "!=")))
910 throw PARSE_ERROR (_("expected '==' or '!='"), op
);
912 return eval_comparison (lhs
, op
, rhs
);
914 else if (l
->type
== tok_identifier
&& l
->content
== "arch")
916 string target_architecture
= s
.architecture
;
917 if (! (r
->type
== tok_string
))
918 throw PARSE_ERROR (_("expected string literal"), r
);
919 string query_architecture
= r
->content
;
921 int nomatch
= fnmatch (query_architecture
.c_str(),
922 target_architecture
.c_str(),
923 FNM_NOESCAPE
); // still spooky
926 if (op
->type
== tok_operator
&& op
->content
== "==")
928 else if (op
->type
== tok_operator
&& op
->content
== "!=")
931 throw PARSE_ERROR (_("expected '==' or '!='"), op
);
935 else if (l
->type
== tok_identifier
&& l
->content
== "runtime")
937 if (! (r
->type
== tok_string
))
938 throw PARSE_ERROR (_("expected string literal"), r
);
940 string query_runtime
= r
->content
;
941 string target_runtime
;
943 if (s
.runtime_mode
== systemtap_session::dyninst_runtime
)
944 target_runtime
= "dyninst";
945 else if (s
.runtime_mode
== systemtap_session::bpf_runtime
)
946 target_runtime
= "bpf";
948 target_runtime
= "kernel";
950 int nomatch
= fnmatch (query_runtime
.c_str(),
951 target_runtime
.c_str(),
952 FNM_NOESCAPE
); // still spooky
955 if (op
->type
== tok_operator
&& op
->content
== "==")
957 else if (op
->type
== tok_operator
&& op
->content
== "!=")
960 throw PARSE_ERROR (_("expected '==' or '!='"), op
);
964 else if (l
->type
== tok_identifier
&& l
->content
.starts_with("CONFIG_"))
966 if (r
->type
== tok_string
)
968 string lhs
= s
.kernel_config
[l
->content
]; // may be empty
969 string rhs
= r
->content
;
971 int nomatch
= fnmatch (rhs
.c_str(), lhs
.c_str(), FNM_NOESCAPE
); // still spooky
974 if (op
->type
== tok_operator
&& op
->content
== "==")
976 else if (op
->type
== tok_operator
&& op
->content
== "!=")
979 throw PARSE_ERROR (_("expected '==' or '!='"), op
);
983 else if (r
->type
== tok_number
)
985 const string
& lhs_string
= s
.kernel_config
[l
->content
];
986 const char* startp
= lhs_string
.c_str ();
987 char* endp
= (char*) startp
;
989 int64_t lhs
= (int64_t) strtoll (startp
, & endp
, 0);
990 if (errno
== ERANGE
|| errno
== EINVAL
|| *endp
!= '\0')
991 throw PARSE_ERROR ("Config option value not a number", l
);
993 int64_t rhs
= lex_cast
<int64_t>(r
->content
);
994 return eval_comparison (lhs
, op
, rhs
);
996 else if (r
->type
== tok_identifier
997 && r
->content
.starts_with( "CONFIG_"))
999 // First try to convert both to numbers,
1000 // otherwise threat both as strings.
1001 const string
& lhs_string
= s
.kernel_config
[l
->content
];
1002 const string
& rhs_string
= s
.kernel_config
[r
->content
];
1003 const char* startp
= lhs_string
.c_str ();
1004 char* endp
= (char*) startp
;
1006 int64_t val
= (int64_t) strtoll (startp
, & endp
, 0);
1007 if (errno
!= ERANGE
&& errno
!= EINVAL
&& *endp
== '\0')
1010 startp
= rhs_string
.c_str ();
1011 endp
= (char*) startp
;
1013 int64_t rhs
= (int64_t) strtoll (startp
, & endp
, 0);
1014 if (errno
!= ERANGE
&& errno
!= EINVAL
&& *endp
== '\0')
1015 return eval_comparison (lhs
, op
, rhs
);
1018 return eval_comparison (lhs_string
, op
, rhs_string
);
1021 throw PARSE_ERROR (_("expected string, number literal or other CONFIG_... as right side operand"), r
);
1023 else if (l
->type
== tok_string
&& r
->type
== tok_string
)
1025 string lhs
= l
->content
;
1026 string rhs
= r
->content
;
1027 return eval_comparison (lhs
, op
, rhs
);
1028 // NB: no wildcarding option here
1030 else if (l
->type
== tok_number
&& r
->type
== tok_number
)
1032 int64_t lhs
= lex_cast
<int64_t>(l
->content
);
1033 int64_t rhs
= lex_cast
<int64_t>(r
->content
);
1034 return eval_comparison (lhs
, op
, rhs
);
1035 // NB: no wildcarding option here
1037 else if (l
->type
== tok_string
&& r
->type
== tok_number
1038 && op
->type
== tok_operator
)
1039 throw PARSE_ERROR (_("expected string literal as right value"), r
);
1040 else if (l
->type
== tok_number
&& r
->type
== tok_string
1041 && op
->type
== tok_operator
)
1042 throw PARSE_ERROR (_("expected number literal as right value"), r
);
1045 throw PARSE_ERROR (_("expected 'arch', 'kernel_v', 'kernel_vr', 'systemtap_v',\n"
1046 " 'runtime', 'systemtap_privilege', 'CONFIG_...', or\n"
1047 " comparison between strings or integers"), l
);
1051 // Only tokens corresponding to the TRUE statement must be expanded
1057 pp_state_t pp
= PP_NONE
;
1058 if (!pp_state
.empty())
1059 pp
= pp_state
.back().second
;
1062 if (pp
== PP_SKIP_THEN
|| pp
== PP_SKIP_ELSE
)
1071 t
= pp_state
.back().first
;
1072 pp_state
.pop_back(); // so skip_some doesn't keep trying to close this
1073 //TRANSLATORS: 'conditional' meaning 'conditional preprocessing'
1074 throw PARSE_ERROR (_("incomplete conditional at end of file"), t
);
1079 // misplaced preprocessor "then"
1080 if (t
->type
== tok_operator
&& t
->content
== "%?")
1081 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t
);
1083 // preprocessor "else"
1084 if (t
->type
== tok_operator
&& t
->content
== "%:")
1087 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t
);
1088 if (pp
== PP_KEEP_ELSE
|| pp
== PP_SKIP_ELSE
)
1089 throw PARSE_ERROR (_("invalid conditional - duplicate '%:'"), t
);
1090 // XXX: here and elsewhere, error cascades might be avoided
1091 // by dropping tokens until we reach the closing %)
1093 pp_state
.back().second
= (pp
== PP_KEEP_THEN
) ?
1094 PP_SKIP_ELSE
: PP_KEEP_ELSE
;
1099 // preprocessor close
1100 if (t
->type
== tok_operator
&& t
->content
== "%)")
1103 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t
);
1104 delete pp_state
.back().first
;
1105 delete t
; //this is the closing bracket
1106 pp_state
.pop_back();
1110 if (! (t
->type
== tok_operator
&& t
->content
== "%(")) // ordinary token
1113 // We have a %( - it's time to throw a preprocessing party!
1115 bool result
= false;
1116 bool and_result
= true;
1117 const token
*n
= NULL
;
1119 const token
*l
, *op
, *r
;
1123 if (l
== 0 || op
== 0 || r
== 0)
1124 throw PARSE_ERROR (_("incomplete condition after '%('"), t
);
1125 // NB: consider generalizing to consume all tokens until %?, and
1126 // passing that as a vector to an evaluator.
1128 // Do not evaluate the condition if we haven't expanded everything.
1129 // This may occur when having several recursive conditionals.
1130 and_result
&= eval_pp_conditional (session
, l
, op
, r
);
1131 if(l
->content
=="systemtap_v")
1142 if (n
&& n
->type
== tok_operator
&& n
->content
== "&&")
1144 result
|= and_result
;
1146 if (! (n
&& n
->type
== tok_operator
&& n
->content
== "||"))
1151 clog << "PP eval (" << *t << ") == " << result << endl;
1155 if (! (m
&& m
->type
== tok_operator
&& m
->content
== "%?"))
1156 throw PARSE_ERROR (_("expected '%?' marker for conditional"), t
);
1159 pp
= result
? PP_KEEP_THEN
: PP_SKIP_THEN
;
1160 pp_state
.push_back (make_pair (t
, pp
));
1162 // Now loop around to look for a real token.
1167 // Skip over tokens and any errors, heeding
1168 // only nested preprocessor starts and ends.
1173 unsigned nesting
= 0;
1178 t
= scan_pp1 (true);
1180 catch (const parse_error
&e
)
1186 if (t
->type
== tok_operator
&& t
->content
== "%(")
1188 else if (nesting
&& t
->type
== tok_operator
&& t
->content
== "%)")
1190 else if (!nesting
&& t
->type
== tok_operator
&&
1191 (t
->content
== "%:" || t
->content
== "%?" || t
->content
== "%)"))
1204 next_t
= scan_pp ();
1206 throw PARSE_ERROR (_("unexpected end-of-file"));
1209 // advance by zeroing next_t
1219 next_t
= scan_pp ();
1221 // don't advance by zeroing next_t
1230 // can only swallow something last peeked or nexted token.
1231 assert (last_t
!= 0);
1233 // advance by zeroing next_t
1234 last_t
= next_t
= 0;
1239 tok_is(token
const * t
, token_type tt
, string
const & expected
)
1241 return t
&& t
->type
== tt
&& t
->content
== expected
;
1246 parser::expect_known (token_type tt
, string
const & expected
)
1248 const token
*t
= next();
1249 if (! (t
&& t
->type
== tt
&& t
->content
== expected
))
1250 throw PARSE_ERROR (_F("expected '%s'", expected
.c_str()));
1251 // NB: PR25174 may require consume_string_literals() someday
1252 swallow (); // We are done with it, content was copied.
1257 parser::expect_unknown (token_type tt
, interned_string
& target
)
1259 const token
*t
= next();
1260 if (!(t
&& t
->type
== tt
))
1261 throw PARSE_ERROR (_("expected ") + tt2str(tt
));
1262 if (t
->type
==tok_string
)
1264 literal_string
*ls
= consume_string_literals (t
);
1270 target
= t
->content
;
1271 swallow (); // We are done with it, content was copied.
1277 parser::expect_unknown2 (token_type tt1
, token_type tt2
, interned_string
& target
)
1279 const token
*t
= next();
1280 if (!(t
&& (t
->type
== tt1
|| t
->type
== tt2
)))
1281 throw PARSE_ERROR (_F("expected %s or %s", tt2str(tt1
).c_str(), tt2str(tt2
).c_str()));
1282 // NB: PR25174 may require consume_string_literals() someday
1283 target
= t
->content
;
1284 swallow (); // We are done with it, content was copied.
1289 parser::expect_op (string
const & expected
)
1291 expect_known (tok_operator
, expected
);
1295 parser::expect_op_any (initializer_list
<const char*> expected
)
1297 const token
*t
= next();
1298 if (t
&& t
->type
== tok_operator
)
1299 for (auto it
= expected
.begin(); it
!= expected
.end(); ++it
)
1300 if (t
->content
== *it
)
1302 interned_string found
= t
->content
;
1303 swallow (); // We are done with it, content was copied.
1308 for (auto it
= expected
.begin(); it
!= expected
.end(); ++it
)
1310 if (it
!= expected
.begin())
1314 throw PARSE_ERROR (_F("expected one of '%s'", msg
.c_str()));
1318 parser::expect_kw (string
const & expected
)
1320 expect_known (tok_keyword
, expected
);
1324 parser::expect_kw_token (string
const & expected
)
1326 const token
*t
= next();
1327 if (! (t
&& t
->type
== tok_keyword
&& t
->content
== expected
))
1328 throw PARSE_ERROR (_F("expected '%s'", expected
.c_str()));
1333 parser::expect_number (int64_t & value
)
1336 const token
*t
= next();
1337 if (t
->type
== tok_operator
&& t
->content
== "-")
1343 if (!(t
&& t
->type
== tok_number
))
1344 throw PARSE_ERROR (_("expected number"));
1346 const string
& s
= t
->content
;
1347 const char* startp
= s
.c_str ();
1348 char* endp
= (char*) startp
;
1350 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1351 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1352 // since the lexer only gives us positive digit strings, but we'll
1353 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1355 value
= (int64_t) strtoull (startp
, & endp
, 0);
1356 if (errno
== ERANGE
|| errno
== EINVAL
|| *endp
!= '\0'
1357 || (neg
&& (unsigned long long) value
> 9223372036854775808ULL)
1358 || (unsigned long long) value
> 18446744073709551615ULL
1359 || value
< -9223372036854775807LL-1)
1360 throw PARSE_ERROR (_("number invalid or out of range"));
1365 swallow (); // We are done with it, content was parsed and copied into value.
1370 parser::expect_ident_or_atword (interned_string
& target
)
1372 const token
*t
= next();
1374 // accept identifiers and operators beginning in '@':
1375 if (!t
|| (t
->type
!= tok_identifier
1376 && (t
->type
!= tok_operator
|| t
->content
[0] != '@')))
1377 // XXX currently this is only called from parse_hist_op_or_bare_name(),
1378 // so the message is accurate, but keep an eye out in the future:
1379 throw PARSE_ERROR (_F("expected %s or statistical operation", tt2str(tok_identifier
).c_str()));
1381 target
= t
->content
;
1387 parser::expect_ident_or_keyword (interned_string
& target
)
1389 expect_unknown2 (tok_identifier
, tok_keyword
, target
);
1394 parser::peek_op (string
const & op
)
1396 return tok_is (peek(), tok_operator
, op
);
1401 parser::peek_kw (string
const & kw
)
1403 return tok_is (peek(), tok_identifier
, kw
);
1408 lexer::lexer (istream
& input
, const string
& in
, systemtap_session
& s
, bool cc
):
1409 ate_comment(false), ate_whitespace(false), saw_tokens(false), check_compatible(cc
),
1410 input_name (in
), input_pointer (0), input_end (0), cursor_suspend_count(0),
1411 cursor_suspend_line (1), cursor_suspend_column (1), cursor_line (1),
1412 cursor_column (1), session(s
), current_file (0), current_token_chain (0)
1414 getline(input
, input_contents
, '\0');
1416 input_pointer
= input_contents
.data();
1417 input_end
= input_contents
.data() + input_contents
.size();
1419 if (keywords
.empty())
1421 // NB: adding new keywords is highly disruptive to the language,
1422 // in particular to existing scripts that could be suddenly
1423 // broken. If done at all, it has to be s.compatible-sensitive,
1424 // and broadly advertised.
1425 keywords
.insert("probe");
1426 keywords
.insert("global");
1427 if (has_version("3.0"))
1428 keywords
.insert("private");
1429 keywords
.insert("function");
1430 keywords
.insert("if");
1431 keywords
.insert("else");
1432 keywords
.insert("for");
1433 keywords
.insert("foreach");
1434 keywords
.insert("in");
1435 keywords
.insert("limit");
1436 keywords
.insert("return");
1437 keywords
.insert("delete");
1438 keywords
.insert("while");
1439 keywords
.insert("break");
1440 keywords
.insert("continue");
1441 keywords
.insert("next");
1442 keywords
.insert("string");
1443 keywords
.insert("long");
1444 keywords
.insert("try");
1445 keywords
.insert("catch");
1448 if (atwords
.empty())
1450 // NB: adding new @words is mildly disruptive to existing
1451 // scripts that define macros with the same name, but not
1452 // really. The user will merely receive a warning that they are
1453 // redefining an existing operator.
1455 // These are inserted without the actual '@', so we can directly check
1456 // proposed macro names without building a string with that prefix.
1457 atwords
.insert("cast");
1458 atwords
.insert("defined");
1459 atwords
.insert("entry");
1460 atwords
.insert("perf");
1461 atwords
.insert("var");
1462 atwords
.insert("avg");
1463 atwords
.insert("count");
1464 atwords
.insert("sum");
1465 atwords
.insert("min");
1466 atwords
.insert("max");
1467 atwords
.insert("hist_linear");
1468 atwords
.insert("hist_log");
1469 if (has_version("3.1"))
1471 atwords
.insert("const");
1472 atwords
.insert("variance");
1474 if (has_version("4.0"))
1476 atwords
.insert("kregister");
1477 atwords
.insert("uregister");
1478 atwords
.insert("kderef");
1479 atwords
.insert("uderef");
1484 unordered_set
<string
> lexer::atwords
;
1487 lexer::set_current_file (stapfile
* f
)
1492 f
->file_contents
= input_contents
;
1493 f
->name
= input_name
;
1498 lexer::set_current_token_chain (const token
* tok
)
1500 current_token_chain
= tok
;
1504 lexer::input_peek (unsigned n
)
1506 if (input_pointer
+ n
>= input_end
)
1508 return (unsigned char)*(input_pointer
+ n
);
1513 lexer::has_version (const char* v
) const
1515 return check_compatible
1516 ? strverscmp(session
.compatible
.c_str(), v
) >= 0
1523 int c
= input_peek();
1524 if (c
< 0) return c
; // EOF
1528 if (cursor_suspend_count
)
1530 // Track effect of input_put: preserve previous cursor/line_column
1531 // until all of its characters are consumed.
1532 if (--cursor_suspend_count
== 0)
1534 cursor_line
= cursor_suspend_line
;
1535 cursor_column
= cursor_suspend_column
;
1540 // update source cursor
1550 // clog << "[" << (char)c << "]";
1556 lexer::input_put (const string
& chars
, const token
* t
)
1558 size_t pos
= input_pointer
- input_contents
.data();
1559 // clog << "[put:" << chars << " @" << pos << "]";
1560 input_contents
.insert (pos
, chars
);
1561 cursor_suspend_count
+= chars
.size();
1562 cursor_suspend_line
= cursor_line
;
1563 cursor_suspend_column
= cursor_column
;
1564 cursor_line
= t
->location
.line
;
1565 cursor_column
= t
->location
.column
;
1566 input_pointer
= input_contents
.data() + pos
;
1567 input_end
= input_contents
.data() + input_contents
.size();
1574 ate_comment
= false; // reset for each new token
1575 ate_whitespace
= false; // reset for each new token
1577 // XXX be very sure to restore old_saw_tokens if we return without a token:
1578 bool old_saw_tokens
= saw_tokens
;
1581 token
* n
= new token
;
1582 string token_str
; // accumulate here instead of by incremental interning
1583 n
->location
.file
= current_file
;
1584 n
->chain
= current_token_chain
;
1587 bool suspended
= (cursor_suspend_count
> 0);
1588 n
->location
.line
= cursor_line
;
1589 n
->location
.column
= cursor_column
;
1591 int c
= input_get();
1592 // clog << "{" << (char)c << (char)c2 << "}";
1596 saw_tokens
= old_saw_tokens
;
1602 ate_whitespace
= true;
1606 int c2
= input_peek ();
1608 // Paste command line arguments as character streams into
1609 // the beginning of a token. $1..$999 go through as raw
1610 // characters; @1..@999 are quoted/escaped as strings.
1611 // $# and @# expand to the number of arguments, similarly
1613 if ((c
== '$' || c
== '@') && (c2
== '#'))
1615 token_str
.push_back (c
);
1616 token_str
.push_back (c2
);
1617 input_get(); // swallow '#'
1621 n
->make_junk(tok_junk_nested_arg
);
1624 size_t num_args
= session
.args
.size ();
1625 input_put ((c
== '$') ? lex_cast (num_args
) : lex_cast_qstring (num_args
), n
);
1629 else if ((c
== '$' || c
== '@') && (isdigit (c2
)))
1632 token_str
.push_back (c
);
1636 token_str
.push_back (c2
);
1637 idx
= (idx
* 10) + (c2
- '0');
1641 idx
<= session
.args
.size()); // prevent overflow
1644 n
->make_junk(tok_junk_nested_arg
);
1648 idx
-1 >= session
.args
.size())
1650 n
->make_junk(tok_junk_invalid_arg
);
1653 session
.used_args
[idx
-1] = true;
1654 const string
& arg
= session
.args
[idx
-1];
1655 input_put ((c
== '$') ? arg
: lex_cast_qstring (arg
), n
);
1660 else if (isalpha (c
) || c
== '$' || c
== '@' || c
== '_')
1662 token_str
= (char) c
;
1663 while (isalnum (c2
) || c2
== '_' || c2
== '$')
1666 token_str
.push_back (c2
);
1669 n
->content
= token_str
;
1671 if (n
->content
[0] == '@')
1672 // makes it easier to detect illegal use of @words:
1673 n
->type
= tok_operator
;
1674 else if (keywords
.count(n
->content
))
1675 n
->type
= tok_keyword
;
1677 n
->type
= tok_identifier
;
1682 else if (isdigit (c
)) // positive literal
1684 n
->type
= tok_number
;
1685 token_str
= (char) c
;
1687 while (isalnum (c2
))
1689 // NB: isalnum is very permissive. We rely on strtol, called in
1690 // parser::parse_literal below, to confirm that the number string
1691 // is correctly formatted and in range.
1694 token_str
.push_back (c2
);
1698 n
->content
= token_str
;
1704 n
->type
= tok_string
;
1709 if (c
< 0 || c
== '\n')
1711 n
->make_junk(tok_junk_unclosed_quote
);
1714 if (c
== '\"') // closing double-quotes
1716 else if (c
== '\\') // see also input_put
1722 if (!has_version("2.3"))
1732 case '0' ... '7': // NB: need only match the first digit
1734 // Pass these escapes through to the string value
1735 // being parsed; it will be emitted into a C literal.
1736 // XXX: PR13371: perhaps we should evaluate them here
1737 // (and re-quote them during translate.cxx emission).
1738 token_str
.push_back ('\\');
1741 default: the_default
:
1742 token_str
.push_back (c
);
1747 token_str
.push_back (c
);
1749 n
->content
= token_str
;
1753 else if (ispunct (c
))
1755 int c3
= input_peek (1);
1757 // NB: if we were to recognize negative numeric literals here,
1758 // we'd introduce another grammar ambiguity:
1759 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
1760 // instead of tok_number(1) tok_operator('-') tok_number(1)
1762 if (c
== '#') // shell comment
1764 unsigned this_line
= cursor_line
;
1765 do { c
= input_get (); }
1766 while (c
>= 0 && cursor_line
== this_line
);
1768 ate_whitespace
= true;
1771 else if ((c
== '/' && c2
== '/')) // C++ comment
1773 unsigned this_line
= cursor_line
;
1774 do { c
= input_get (); }
1775 while (c
>= 0 && cursor_line
== this_line
);
1777 ate_whitespace
= true;
1780 else if (c
== '/' && c2
== '*') // C comment
1782 (void) input_get (); // swallow '*' already in c2
1787 if (c
== '*' && c2
== '/')
1793 ate_whitespace
= true;
1796 else if (c
== '%' && c2
== '{') // embedded code
1798 n
->type
= tok_embedded
;
1799 (void) input_get (); // swallow '{' already in c2
1804 if (c
== '%' && c2
== '}')
1806 n
->content
= token_str
;
1809 if (c
== '}' && c2
== '%') // possible typo
1810 session
.print_warning (_("possible erroneous closing '}%', use '%}'?"), n
);
1811 token_str
.push_back (c
);
1816 n
->make_junk(tok_junk_unclosed_embedded
);
1820 // We're committed to recognizing at least the first character
1822 n
->type
= tok_operator
;
1823 token_str
= (char) c
;
1825 // match all valid operators, in decreasing size order
1826 if ((c
== '<' && c2
== '<' && c3
== '<') ||
1827 (c
== '>' && c2
== '>' && c3
== '>') ||
1828 (c
== '<' && c2
== '<' && c3
== '=') ||
1829 (c
== '>' && c2
== '>' && c3
== '='))
1831 token_str
.push_back (c2
);
1832 token_str
.push_back (c3
);
1836 else if ((c
== '=' && c2
== '=') ||
1837 (c
== '!' && c2
== '=') ||
1838 (c
== '<' && c2
== '=') ||
1839 (c
== '>' && c2
== '=') ||
1840 (c
== '=' && c2
== '~') ||
1841 (c
== '!' && c2
== '~') ||
1842 (c
== '+' && c2
== '=') ||
1843 (c
== '-' && c2
== '=') ||
1844 (c
== '*' && c2
== '=') ||
1845 (c
== '/' && c2
== '=') ||
1846 (c
== '%' && c2
== '=') ||
1847 (c
== '&' && c2
== '=') ||
1848 (c
== '^' && c2
== '=') ||
1849 (c
== '|' && c2
== '=') ||
1850 (c
== '.' && c2
== '=') ||
1851 (c
== '&' && c2
== '&') ||
1852 (c
== '|' && c2
== '|') ||
1853 (c
== '+' && c2
== '+') ||
1854 (c
== '-' && c2
== '-') ||
1855 (c
== '-' && c2
== '>') ||
1856 (c
== '<' && c2
== '<') ||
1857 (c
== '>' && c2
== '>') ||
1858 // preprocessor tokens
1859 (c
== '%' && c2
== '(') ||
1860 (c
== '%' && c2
== '?') ||
1861 (c
== '%' && c2
== ':') ||
1862 (c
== '%' && c2
== ')'))
1864 token_str
.push_back (c2
);
1865 input_get (); // swallow other character
1868 n
->content
= token_str
;
1876 s
<< "\\x" << hex
<< setw(2) << setfill('0') << c
;
1877 n
->content
= s
.str();
1878 // signal parser to emit "expected X, found junk" type error
1879 n
->make_junk(tok_junk_unknown
);
1884 // ------------------------------------------------------------------------
1887 token::make_junk (token_junk_type junk
)
1893 // ------------------------------------------------------------------------
1896 token::junk_message(systemtap_session
& session
) const
1900 case tok_junk_nested_arg
:
1901 return _("invalid nested substitution of command line arguments");
1903 case tok_junk_invalid_arg
:
1904 return _F("command line argument out of range [1-%lu]",
1905 (unsigned long) session
.args
.size());
1907 case tok_junk_unclosed_quote
:
1908 return _("Could not find matching closing quote");
1910 case tok_junk_unclosed_embedded
:
1911 return _("Could not find matching '%}' to close embedded function block");
1914 return _("unknown junk token");
1918 // ------------------------------------------------------------------------
1923 stapfile
* f
= new stapfile
;
1924 f
->privileged
= this->privileged
;
1925 input
.set_current_file (f
);
1933 systemtap_v_seen
= 0;
1934 const token
* t
= peek ();
1935 if (! t
) // nice clean EOF, modulo any preprocessing that occurred
1939 if (t
->type
== tok_keyword
&& t
->content
== "probe")
1941 context
= con_probe
;
1942 parse_probe (f
->probes
, f
->aliases
);
1944 else if (t
->type
== tok_keyword
&& t
->content
== "private")
1946 context
= con_unknown
;
1947 parse_private (f
->globals
, f
->probes
, f
->name
, f
->functions
);
1949 else if (t
->type
== tok_keyword
&& t
->content
== "global")
1951 context
= con_global
;
1952 parse_global (f
->globals
, f
->probes
, f
->name
);
1954 else if (t
->type
== tok_keyword
&& t
->content
== "function")
1956 context
= con_function
;
1957 parse_functiondecl (f
->functions
, f
->name
);
1959 else if (t
->type
== tok_embedded
)
1961 context
= con_embedded
;
1962 f
->embeds
.push_back (parse_embeddedcode ());
1966 context
= con_unknown
;
1967 throw PARSE_ERROR (_("expected 'probe', 'global', 'private', 'function', or '%{'"));
1970 catch (parse_error
& pe
)
1972 print_error (pe
, errs_as_warnings
);
1974 // XXX: do we want tok_junk to be able to force skip_some behaviour?
1975 if (pe
.skip_some
) // for recovery
1976 // Quietly swallow all tokens until the next keyword we can start parsing from.
1981 const token
* t
= peek ();
1984 if (t
->type
== tok_keyword
&& t
->content
== "probe") break;
1985 else if (t
->type
== tok_keyword
&& t
->content
== "private") break;
1986 else if (t
->type
== tok_keyword
&& t
->content
== "global") break;
1987 else if (t
->type
== tok_keyword
&& t
->content
== "function") break;
1988 else if (t
->type
== tok_embedded
) break;
1989 swallow (); // swallow it
1992 catch (parse_error
& pe2
)
1994 // parse error during recovery ... ugh
2000 if (empty
&& user_file
)
2002 // vary message depending on whether file was *actually* empty:
2003 cerr
<< (input
.saw_tokens
2004 ? _F("Input file '%s' is empty after preprocessing.", input_name
.c_str())
2005 : _F("Input file '%s' is empty.", input_name
.c_str()))
2010 else if (num_errors
> 0)
2012 cerr
<< _NF("%d parse error.", "%d parse errors.", num_errors
, num_errors
) << endl
;
2017 input
.set_current_file(0);
2023 parser::parse_synthetic_probe (const token
* chain
)
2026 stapfile
* f
= new stapfile
;
2027 f
->privileged
= this->privileged
;
2028 f
->synthetic
= true;
2029 input
.set_current_file (f
);
2030 input
.set_current_token_chain (chain
);
2034 context
= con_probe
;
2035 parse_probe (f
->probes
, f
->aliases
);
2037 if (f
->probes
.size() != 1 || !f
->aliases
.empty())
2038 throw PARSE_ERROR (_("expected a single synthetic probe"));
2041 catch (parse_error
& pe
)
2043 print_error (pe
, errs_as_warnings
);
2046 // TODO check for unparsed tokens?
2048 input
.set_current_file(0);
2049 input
.set_current_token_chain(0);
2050 p
->synthetic
= true;
2056 parser::parse_probe (vector
<probe
*> & probe_ret
,
2057 vector
<probe_alias
*> & alias_ret
)
2059 const token
* t0
= next ();
2060 if (! (t0
->type
== tok_keyword
&& t0
->content
== "probe"))
2061 throw PARSE_ERROR (_("expected 'probe'"));
2063 vector
<probe_point
*> aliases
;
2064 vector
<probe_point
*> locations
;
2066 int epilogue_alias
= 0;
2070 vector
<probe_point
*> pps
= parse_probe_points();
2072 const token
* t
= peek ();
2073 if (pps
.size() == 1 && t
2074 && t
->type
== tok_operator
&& t
->content
== "=")
2076 if (pps
[0]->optional
|| pps
[0]->sufficient
)
2077 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps
[0]->components
.front()->tok
);
2078 aliases
.push_back(pps
[0]);
2082 else if (pps
.size() == 1 && t
2083 && t
->type
== tok_operator
&& t
->content
== "+=")
2085 if (pps
[0]->optional
|| pps
[0]->sufficient
)
2086 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps
[0]->components
.front()->tok
);
2087 aliases
.push_back(pps
[0]);
2092 else if (t
&& t
->type
== tok_operator
&& t
->content
== "{")
2094 locations
.insert(locations
.end(), pps
.begin(), pps
.end());
2098 throw PARSE_ERROR (_("expected probe point specifier"));
2101 if (aliases
.empty())
2103 probe
* p
= new probe
;
2105 p
->locations
= locations
;
2106 p
->body
= parse_stmt_block ();
2107 p
->privileged
= privileged
;
2108 p
->systemtap_v_conditional
= systemtap_v_seen
;
2109 probe_ret
.push_back (p
);
2113 probe_alias
* p
= new probe_alias (aliases
);
2115 p
->epilogue_style
= true;
2117 p
->epilogue_style
= false;
2119 p
->locations
= locations
;
2120 p
->body
= parse_stmt_block ();
2121 p
->privileged
= privileged
;
2122 p
->systemtap_v_conditional
= systemtap_v_seen
;
2123 alias_ret
.push_back (p
);
2129 parser::parse_embeddedcode ()
2131 embeddedcode
* e
= new embeddedcode
;
2132 const token
* t
= next ();
2133 if (t
->type
!= tok_embedded
)
2134 throw PARSE_ERROR (_("expected '%{'"));
2137 throw PARSE_ERROR (_("embedded code in unprivileged script; need stap -g"),
2138 false /* don't skip tokens for parse resumption */);
2141 e
->code
= t
->content
;
2147 parser::parse_stmt_block ()
2149 block
* pb
= new block
;
2151 const token
* t
= next ();
2152 if (! (t
->type
== tok_operator
&& t
->content
== "{"))
2153 throw PARSE_ERROR (_("expected '{'"));
2160 if (t
&& t
->type
== tok_operator
&& t
->content
== "}")
2165 pb
->statements
.push_back (parse_statement ());
2173 parser::parse_try_block ()
2175 try_block
* pb
= new try_block
;
2177 pb
->tok
= expect_kw_token ("try");
2178 pb
->try_block
= parse_stmt_block();
2179 expect_kw ("catch");
2181 const token
* t
= peek ();
2182 if (t
!= NULL
&& t
->type
== tok_operator
&& t
->content
== "(")
2184 swallow (); // swallow the '('
2187 if (! (t
->type
== tok_identifier
))
2188 throw PARSE_ERROR (_("expected identifier"));
2189 symbol
* sym
= new symbol
;
2191 sym
->name
= t
->content
;
2192 pb
->catch_error_var
= sym
;
2197 pb
->catch_error_var
= 0;
2199 pb
->catch_block
= parse_stmt_block();
2207 parser::parse_statement ()
2210 const token
* t
= peek ();
2211 if (t
&& t
->type
== tok_operator
&& t
->content
== ";")
2212 return new null_statement (next ());
2213 else if (t
&& t
->type
== tok_operator
&& t
->content
== "{")
2214 return parse_stmt_block (); // Don't squash semicolons.
2215 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "try")
2216 return parse_try_block (); // Don't squash semicolons.
2217 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "if")
2218 return parse_if_statement (); // Don't squash semicolons.
2219 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "for")
2220 return parse_for_loop (); // Don't squash semicolons.
2221 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "foreach")
2222 return parse_foreach_loop (); // Don't squash semicolons.
2223 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "while")
2224 return parse_while_loop (); // Don't squash semicolons.
2225 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "return")
2226 ret
= parse_return_statement ();
2227 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "delete")
2228 ret
= parse_delete_statement ();
2229 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "break")
2230 ret
= parse_break_statement ();
2231 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "continue")
2232 ret
= parse_continue_statement ();
2233 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "next")
2234 ret
= parse_next_statement ();
2235 else if (t
&& (t
->type
== tok_operator
|| // expressions are flexible
2236 t
->type
== tok_identifier
||
2237 t
->type
== tok_number
||
2238 t
->type
== tok_string
||
2239 t
->type
== tok_embedded
))
2240 ret
= parse_expr_statement ();
2241 // XXX: consider generally accepting tok_embedded here too
2243 throw PARSE_ERROR (_("expected statement"));
2245 // Squash "empty" trailing colons after any "non-block-like" statement.
2247 if (t
&& t
->type
== tok_operator
&& t
->content
== ";")
2249 swallow (); // Silently eat trailing ; after statement
2256 parser::parse_private (vector
<vardecl
*>& globals
, vector
<probe
*>& probes
,
2257 string
const & fname
, vector
<functiondecl
*>& functions
)
2259 const token
* t
= next ();
2260 if (! (t
->type
== tok_keyword
&& t
->content
== "private"))
2261 throw PARSE_ERROR (_("expected 'private'"));
2264 if (t
->type
== tok_keyword
&& t
->content
== "function")
2267 context
= con_function
;
2268 do_parse_functiondecl(functions
, t
, fname
, true);
2270 else if (t
->type
== tok_keyword
&& t
->content
== "global")
2273 context
= con_global
;
2275 if (! (t
->type
== tok_identifier
))
2276 throw PARSE_ERROR (_("expected identifier"));
2277 do_parse_global(globals
, probes
, fname
, t
, true);
2279 // The `private <identifier>` is an acceptable shorthand
2280 // for `private global <identifier>` per above.
2281 else if (t
->type
== tok_identifier
)
2283 context
= con_global
;
2284 do_parse_global(globals
, probes
, fname
, t
, true);
2287 throw PARSE_ERROR (_("expected 'function' or identifier"));
2291 parser::parse_global (vector
<vardecl
*>& globals
, vector
<probe
*>& probes
,
2292 string
const & fname
)
2294 const token
* t0
= next ();
2295 if (! (t0
->type
== tok_keyword
&& t0
->content
== "global"))
2296 throw PARSE_ERROR (_("expected 'global' or 'private'"));
2298 do_parse_global(globals
, probes
, fname
, 0, false);
2302 parser::do_parse_global (vector
<vardecl
*>& globals
, vector
<probe
*>&,
2303 string
const & fname
, const token
* t0
, bool priv
)
2309 t
= (iter0
&& priv
) ? t0
: next ();
2311 if (! (t
->type
== tok_identifier
))
2312 throw PARSE_ERROR (_("expected identifier"));
2314 string gname
= "__global_" + string(t
->content
);
2315 string pname
= "__private_" + detox_path(fname
) + string(t
->content
);
2316 string name
= priv
? pname
: gname
;
2318 for (unsigned i
=0; i
<globals
.size(); i
++)
2320 if (globals
[i
]->name
== name
)
2321 throw PARSE_ERROR (_("duplicate global name"));
2322 if ((globals
[i
]->name
== gname
) || (globals
[i
]->name
== pname
))
2323 throw PARSE_ERROR (_("global versus private variable declaration conflict"));
2326 vardecl
* d
= new vardecl
;
2327 d
->unmangled_name
= t
->content
;
2330 d
->systemtap_v_conditional
= systemtap_v_seen
;
2331 globals
.push_back (d
);
2335 if(t
&& t
->type
== tok_operator
&& t
->content
== "%") //wrapping
2342 if (t
&& t
->type
== tok_operator
&& t
->content
== "[") // array size
2346 expect_number(size
);
2347 if (size
<= 0 || size
> INT_MAX
)
2348 throw PARSE_ERROR(_("array size out of range"));
2349 d
->maxsize
= (int)size
;
2350 expect_known(tok_operator
, "]");
2354 if (t
&& t
->type
== tok_operator
&& t
->content
== "=") // initialization
2356 if (!d
->compatible_arity(0))
2357 throw PARSE_ERROR(_("only scalar globals can be initialized"));
2359 next (); // Don't swallow, set_arity() used the peeked token.
2360 d
->init
= parse_literal ();
2361 d
->type
= d
->init
->type
;
2365 if (t
&& t
->type
== tok_operator
&& t
->content
== ";") // termination
2371 if (t
&& t
->type
== tok_operator
&& t
->content
== ",") // next global
2382 parser::parse_functiondecl (vector
<functiondecl
*>& functions
,
2383 string
const & fname
)
2385 const token
* t
= next ();
2386 if (! (t
->type
== tok_keyword
&& t
->content
== "function"))
2387 throw PARSE_ERROR (_("expected 'function'"));
2389 do_parse_functiondecl(functions
, t
, fname
, false);
2393 parser::do_parse_functiondecl (vector
<functiondecl
*>& functions
, const token
* t
,
2394 string
const & fname
, bool priv
)
2397 if (! (t
->type
== tok_identifier
)
2398 && ! (t
->type
== tok_keyword
2399 && (t
->content
== "string" || t
->content
== "long")))
2400 throw PARSE_ERROR (_("expected identifier"));
2402 string gname
= "__global_" + string(t
->content
);
2403 string pname
= "__private_" + detox_path(fname
) + string(t
->content
);
2404 string name
= priv
? pname
: gname
;
2405 name
+= "__overload_" + lex_cast(session
.overload_count
[t
->content
]++);
2407 functiondecl
*fd
= new functiondecl ();
2408 fd
->unmangled_name
= t
->content
;
2413 if (t
->type
== tok_operator
&& t
->content
== ":")
2417 if (t
->type
== tok_keyword
&& t
->content
== "string")
2418 fd
->type
= pe_string
;
2419 else if (t
->type
== tok_keyword
&& t
->content
== "long")
2421 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2427 if (! (t
->type
== tok_operator
&& t
->content
== "("))
2428 throw PARSE_ERROR (_("expected '('"));
2435 // permit zero-argument functions
2436 if (t
->type
== tok_operator
&& t
->content
== ")")
2441 else if (! (t
->type
== tok_identifier
))
2442 throw PARSE_ERROR (_("expected identifier"));
2443 vardecl
* vd
= new vardecl
;
2444 vd
->unmangled_name
= vd
->name
= t
->content
;
2446 for (auto it
= fd
->formal_args
.begin() ; it
!= fd
->formal_args
.end(); ++it
)
2448 string param
= vd
->unmangled_name
;
2449 if ((*it
)->unmangled_name
== param
)
2450 throw PARSE_ERROR(_("duplicate parameter names"));
2454 fd
->formal_args
.push_back (vd
);
2455 fd
->systemtap_v_conditional
= systemtap_v_seen
;
2459 if (t
->type
== tok_operator
&& t
->content
== ":")
2463 if (t
->type
== tok_keyword
&& t
->content
== "string")
2464 vd
->type
= pe_string
;
2465 else if (t
->type
== tok_keyword
&& t
->content
== "long")
2467 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2471 if (t
->type
== tok_operator
&& t
->content
== ")")
2476 if (t
->type
== tok_operator
&& t
->content
== ",")
2482 throw PARSE_ERROR (_("expected ',' or ')'"));
2486 if (t
->type
== tok_operator
&& t
->content
== ":")
2489 literal
* literal
= parse_literal();
2490 literal_number
* ln
= dynamic_cast<literal_number
*>(literal
);
2492 throw PARSE_ERROR (_("expected literal number"));
2493 fd
->priority
= ln
->value
;
2495 // reserve priority 0 for user script implementation
2496 if (fd
->priority
< 1)
2497 throw PARSE_ERROR (_("specified priority must be > 0"));
2502 // allow script file implementation override automatically when
2503 // priority not specified
2508 if (t
&& t
->type
== tok_embedded
)
2509 fd
->body
= parse_embeddedcode ();
2511 fd
->body
= parse_stmt_block ();
2513 functions
.push_back (fd
);
2516 vector
<probe_point
*>
2517 parser::parse_probe_points()
2519 vector
<probe_point
*> pps
;
2522 vector
<probe_point
*> tail
= parse_components();
2523 pps
.insert(pps
.end(), tail
.begin(), tail
.end());
2525 const token
* t
= peek();
2526 if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
2532 if (t
&& t
->type
== tok_operator
2533 && (t
->content
== "{" || t
->content
== "=" ||
2534 t
->content
== "+="|| t
->content
== "}"))
2537 throw PARSE_ERROR (_("expected one of ', { } = +='"));
2542 vector
<probe_point
*>
2543 parser::parse_components()
2545 vector
<probe_point
*> pps
;
2548 vector
<probe_point
*> suffix
= parse_component();
2550 // Cartesian product of components
2555 assert(!suffix
.empty());
2556 vector
<probe_point
*> product
;
2557 for (unsigned i
= 0; i
< pps
.size(); i
++)
2559 if (pps
[i
]->optional
|| pps
[i
]->sufficient
|| pps
[i
]->condition
)
2560 throw PARSE_ERROR (_("'?', '!' or condition must only be specified in suffix"),
2561 pps
[i
]->components
[0]->tok
);
2562 for (unsigned j
= 0; j
< suffix
.size(); j
++)
2564 probe_point
* pp
= new probe_point
;
2565 pp
->components
.insert(pp
->components
.end(),
2566 pps
[i
]->components
.begin(),
2567 pps
[i
]->components
.end());
2568 pp
->components
.insert(pp
->components
.end(),
2569 suffix
[j
]->components
.begin(),
2570 suffix
[j
]->components
.end());
2571 pp
->optional
= suffix
[j
]->optional
;
2572 pp
->sufficient
= suffix
[j
]->sufficient
;
2574 pp
->auto_path
= suffix
[j
]->auto_path
;
2575 pp
->condition
= suffix
[j
]->condition
;
2576 product
.push_back(pp
);
2579 for (unsigned i
= 0; i
< pps
.size(); i
++) delete pps
[i
];
2580 for (unsigned i
= 0; i
< suffix
.size(); i
++) delete suffix
[i
];
2584 const token
* t
= peek();
2585 if (t
&& t
->type
== tok_operator
&& t
->content
== ".")
2591 // We only fall through here at the end of a probe point (past
2592 // all the dotted/parametrized components).
2594 if (t
&& t
->type
== tok_operator
&&
2595 (t
->content
== "?" || t
->content
== "!"))
2597 for (unsigned i
= 0; i
< pps
.size(); i
++)
2599 if (pps
[i
]->optional
|| pps
[i
]->sufficient
)
2600 throw PARSE_ERROR (_("'?' or '!' respecified"));
2601 pps
[i
]->optional
= true;
2602 if (t
->content
== "!") pps
[i
]->sufficient
= true;
2604 // NB: sufficient implies optional
2610 if (t
&& t
->type
== tok_keyword
&& t
->content
== "if")
2614 if (!(t
&& t
->type
== tok_operator
&& t
->content
== "("))
2615 throw PARSE_ERROR (_("expected '('"));
2618 expression
* e
= parse_expression();
2619 for (unsigned i
= 0; i
< pps
.size(); i
++)
2621 if (pps
[i
]->condition
!= 0)
2622 throw PARSE_ERROR (_("condition respecified"));
2623 pps
[i
]->condition
= e
;
2627 if (!(t
&& t
->type
== tok_operator
&& t
->content
== ")"))
2628 throw PARSE_ERROR (_("expected ')'"));
2637 vector
<probe_point
*>
2638 parser::parse_component()
2640 const token
* t
= next ();
2641 if (! (t
->type
== tok_identifier
2642 // we must allow ".return" and ".function", which are keywords
2643 || t
->type
== tok_keyword
2644 // we must allow "*", due to being an operator
2645 || (t
->type
== tok_operator
&& (t
->content
== "*" || t
->content
== "{"))))
2646 throw PARSE_ERROR (_("expected identifier or '*' or '{'"));
2648 if (t
&& t
->type
== tok_operator
&& t
->content
== "{")
2651 vector
<probe_point
*> pps
= parse_probe_points();
2653 if (!(t
&& t
->type
== tok_operator
&& t
->content
== "}"))
2654 throw PARSE_ERROR (_("expected '}'"));
2660 // loop which reconstitutes an identifier with wildcards
2661 string content
= t
->content
;
2662 bool changed_p
= false;
2665 const token
* u
= peek();
2668 // ensure pieces of the identifier are adjacent:
2669 if (input
.ate_whitespace
)
2671 // ensure pieces of the identifier are valid:
2672 if (! (u
->type
== tok_identifier
2673 // we must allow arbitrary keywords with a wildcard
2674 || u
->type
== tok_keyword
2675 // we must allow "*", due to being an operator
2676 || (u
->type
== tok_operator
&& u
->content
== "*")))
2680 content
= content
+ (string
)u
->content
;
2689 // We've already swallowed the first token and we're not
2690 // putting it back; no one else has a copy; so we can
2691 // safely overwrite its content and reuse it.
2692 const_cast<token
*>(t
)->content
= content
;
2695 probe_point::component
* c
= new probe_point::component
;
2696 c
->functor
= t
->content
;
2698 vector
<probe_point
*> pps
;
2699 probe_point
* pp
= new probe_point
;
2701 pp
->auto_path
= input_name
;
2702 pp
->components
.push_back(c
);
2704 // NB we may add c->arg soon
2708 // consume optional parameter
2709 if (t
&& t
->type
== tok_operator
&& t
->content
== "(")
2711 swallow (); // consume "("
2712 c
->arg
= parse_literal ();
2715 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
2716 throw PARSE_ERROR (_("expected ')'"));
2725 parser::consume_string_literals(const token
*t
)
2727 literal_string
*ls
= new literal_string (t
->content
);
2729 // PR11208: check if the next token is also a string literal;
2730 // auto-concatenate it. This is complicated to the extent that we
2731 // need to skip intermediate whitespace.
2733 // NB for versions prior to 2.0: but don't skip over intervening comments
2735 bool p_concat
= false;
2736 const token
*n
= peek();
2737 while (n
!= NULL
&& n
->type
== tok_string
2738 && ! (!input
.has_version("2.0") && input
.ate_comment
))
2742 concat
= t
->content
;
2745 concat
.append(n
->content
.data(), n
->content
.size());
2746 next(); // consume the token
2755 // Parse a string literal and perform backslash escaping on the contents:
2757 parser::parse_literal_string ()
2759 const token
* t
= next ();
2761 if (t
->type
== tok_string
)
2762 l
= consume_string_literals (t
);
2764 throw PARSE_ERROR (_("expected literal string"));
2772 parser::parse_literal ()
2774 const token
* t
= next ();
2776 if (t
->type
== tok_string
)
2778 l
= consume_string_literals (t
);
2783 if (t
->type
== tok_operator
&& t
->content
== "-")
2790 if (t
->type
== tok_number
)
2792 const string
& s
= t
->content
;
2793 const char* startp
= s
.c_str ();
2794 char* endp
= (char*) startp
;
2796 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
2797 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
2798 // since the lexer only gives us positive digit strings, but we'll
2799 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
2801 long long value
= (long long) strtoull (startp
, & endp
, 0);
2802 if (errno
== ERANGE
|| errno
== EINVAL
|| *endp
!= '\0'
2803 || (neg
&& (unsigned long long) value
> 9223372036854775808ULL)
2804 || (unsigned long long) value
> 18446744073709551615ULL
2805 || value
< -9223372036854775807LL-1)
2806 throw PARSE_ERROR (_("number invalid or out of range"));
2811 l
= new literal_number (value
);
2814 throw PARSE_ERROR (_("expected literal string or number"));
2823 parser::parse_if_statement ()
2825 const token
* t
= next ();
2826 if (! (t
->type
== tok_keyword
&& t
->content
== "if"))
2827 throw PARSE_ERROR (_("expected 'if'"));
2828 if_statement
* s
= new if_statement
;
2832 if (! (t
->type
== tok_operator
&& t
->content
== "("))
2833 throw PARSE_ERROR (_("expected '('"));
2836 s
->condition
= parse_expression ();
2839 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
2840 throw PARSE_ERROR (_("expected ')'"));
2843 s
->thenblock
= parse_statement ();
2846 if (t
&& t
->type
== tok_keyword
&& t
->content
== "else")
2849 s
->elseblock
= parse_statement ();
2852 s
->elseblock
= 0; // in case not otherwise initialized
2859 parser::parse_expr_statement ()
2861 expr_statement
*es
= new expr_statement
;
2862 const token
* t
= peek ();
2864 throw PARSE_ERROR (_("expression statement expected"));
2865 // Copy, we only peeked, parse_expression might swallow.
2866 es
->tok
= new token (*t
);
2867 es
->value
= parse_expression ();
2873 parser::parse_return_statement ()
2875 const token
* t
= next ();
2876 if (! (t
->type
== tok_keyword
&& t
->content
== "return"))
2877 throw PARSE_ERROR (_("expected 'return'"));
2878 if (context
!= con_function
)
2879 throw PARSE_ERROR (_("found 'return' not in function context"));
2880 return_statement
* s
= new return_statement
;
2884 if (t
->type
== tok_operator
&& (t
->content
== ";" || t
->content
== "}"))
2885 s
->value
= NULL
; // no return value
2887 s
->value
= parse_expression ();
2893 parser::parse_delete_statement ()
2895 const token
* t
= next ();
2896 if (! (t
->type
== tok_keyword
&& t
->content
== "delete"))
2897 throw PARSE_ERROR (_("expected 'delete'"));
2898 delete_statement
* s
= new delete_statement
;
2900 s
->value
= parse_expression ();
2906 parser::parse_next_statement ()
2908 const token
* t
= next ();
2909 if (! (t
->type
== tok_keyword
&& t
->content
== "next"))
2910 throw PARSE_ERROR (_("expected 'next'"));
2911 next_statement
* s
= new next_statement
;
2918 parser::parse_break_statement ()
2920 const token
* t
= next ();
2921 if (! (t
->type
== tok_keyword
&& t
->content
== "break"))
2922 throw PARSE_ERROR (_("expected 'break'"));
2923 break_statement
* s
= new break_statement
;
2930 parser::parse_continue_statement ()
2932 const token
* t
= next ();
2933 if (! (t
->type
== tok_keyword
&& t
->content
== "continue"))
2934 throw PARSE_ERROR (_("expected 'continue'"));
2935 continue_statement
* s
= new continue_statement
;
2942 parser::parse_for_loop ()
2944 const token
* t
= next ();
2945 if (! (t
->type
== tok_keyword
&& t
->content
== "for"))
2946 throw PARSE_ERROR (_("expected 'for'"));
2947 for_loop
* s
= new for_loop
;
2951 if (! (t
->type
== tok_operator
&& t
->content
== "("))
2952 throw PARSE_ERROR (_("expected '('"));
2955 // initializer + ";"
2957 if (t
&& t
->type
== tok_operator
&& t
->content
== ";")
2964 s
->init
= parse_expr_statement ();
2966 if (! (t
->type
== tok_operator
&& t
->content
== ";"))
2967 throw PARSE_ERROR (_("expected ';'"));
2973 if (t
&& t
->type
== tok_operator
&& t
->content
== ";")
2975 literal_number
* l
= new literal_number(1);
2977 s
->cond
->tok
= next ();
2981 s
->cond
= parse_expression ();
2983 if (! (t
->type
== tok_operator
&& t
->content
== ";"))
2984 throw PARSE_ERROR (_("expected ';'"));
2990 if (t
&& t
->type
== tok_operator
&& t
->content
== ")")
2997 s
->incr
= parse_expr_statement ();
2999 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
3000 throw PARSE_ERROR (_("expected ')'"));
3005 s
->block
= parse_statement ();
3012 parser::parse_while_loop ()
3014 const token
* t
= next ();
3015 if (! (t
->type
== tok_keyword
&& t
->content
== "while"))
3016 throw PARSE_ERROR (_("expected 'while'"));
3017 for_loop
* s
= new for_loop
;
3021 if (! (t
->type
== tok_operator
&& t
->content
== "("))
3022 throw PARSE_ERROR (_("expected '('"));
3025 // dummy init and incr fields
3030 s
->cond
= parse_expression ();
3033 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
3034 throw PARSE_ERROR (_("expected ')'"));
3038 s
->block
= parse_statement ();
3045 parser::parse_foreach_loop ()
3047 const token
* t
= next ();
3048 if (! (t
->type
== tok_keyword
&& t
->content
== "foreach"))
3049 throw PARSE_ERROR (_("expected 'foreach'"));
3050 foreach_loop
* s
= new foreach_loop
;
3052 s
->sort_direction
= 0;
3053 s
->sort_aggr
= sc_none
;
3058 if (! (t
->type
== tok_operator
&& t
->content
== "("))
3059 throw PARSE_ERROR (_("expected '('"));
3062 symbol
* lookahead_sym
= NULL
;
3063 int lookahead_sort
= 0;
3066 if (t
&& t
->type
== tok_identifier
)
3069 lookahead_sym
= new symbol
;
3070 lookahead_sym
->tok
= t
;
3071 lookahead_sym
->name
= t
->content
;
3074 if (t
&& t
->type
== tok_operator
&&
3075 (t
->content
== "+" || t
->content
== "-"))
3077 lookahead_sort
= (t
->content
== "+") ? 1 : -1;
3082 if (t
&& t
->type
== tok_operator
&& t
->content
== "=")
3085 s
->value
= lookahead_sym
;
3088 s
->sort_direction
= lookahead_sort
;
3091 lookahead_sym
= NULL
;
3095 // see also parse_array_in
3097 bool parenthesized
= false;
3099 if (!lookahead_sym
&& t
&& t
->type
== tok_operator
&& t
->content
== "[")
3102 parenthesized
= true;
3107 s
->indexes
.push_back (lookahead_sym
);
3110 s
->sort_direction
= lookahead_sort
;
3113 lookahead_sym
= NULL
;
3118 if (! (t
->type
== tok_identifier
))
3119 throw PARSE_ERROR (_("expected identifier"));
3120 symbol
* sym
= new symbol
;
3122 sym
->name
= t
->content
;
3123 s
->indexes
.push_back (sym
);
3126 if (t
&& t
->type
== tok_operator
&&
3127 (t
->content
== "+" || t
->content
== "-"))
3129 if (s
->sort_direction
)
3130 throw PARSE_ERROR (_("multiple sort directives"));
3131 s
->sort_direction
= (t
->content
== "+") ? 1 : -1;
3132 s
->sort_column
= s
->indexes
.size();
3139 if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
3144 else if (t
&& t
->type
== tok_operator
&& t
->content
== "]")
3150 throw PARSE_ERROR (_("expected ',' or ']'"));
3153 break; // expecting only one expression
3157 if (! (t
->type
== tok_keyword
&& t
->content
== "in"))
3158 throw PARSE_ERROR (_("expected 'in'"));
3161 s
->base
= parse_indexable();
3163 // check if there was an array slice that was specified
3165 if (t
&& t
->type
== tok_operator
&& t
->content
== "[")
3171 if (t
&& t
->type
== tok_operator
&& t
->content
== "*")
3174 s
->array_slice
.push_back (NULL
);
3177 s
->array_slice
.push_back (parse_expression());
3180 if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
3185 else if (t
&& t
->type
== tok_operator
&& t
->content
== "]")
3191 throw PARSE_ERROR (_("expected ',' or ']'"));
3196 // check for atword, see also expect_ident_or_atword,
3198 if (t
&& t
->type
== tok_operator
&& t
->content
[0] == '@')
3200 if (t
->content
== "@avg") s
->sort_aggr
= sc_average
;
3201 else if (t
->content
== "@min") s
->sort_aggr
= sc_min
;
3202 else if (t
->content
== "@max") s
->sort_aggr
= sc_max
;
3203 else if (t
->content
== "@count") s
->sort_aggr
= sc_count
;
3204 else if (t
->content
== "@sum") s
->sort_aggr
= sc_sum
;
3205 else if (t
->content
== "@variance") s
->sort_aggr
= sc_variance
;
3206 else throw PARSE_ERROR(_("expected statistical operation"));
3210 if (! (t
&& t
->type
== tok_operator
&& (t
->content
== "+" || t
->content
== "-")))
3211 throw PARSE_ERROR(_("expected sort directive"));
3215 if (t
&& t
->type
== tok_operator
&&
3216 (t
->content
== "+" || t
->content
== "-"))
3218 if (s
->sort_direction
)
3219 throw PARSE_ERROR (_("multiple sort directives"));
3220 s
->sort_direction
= (t
->content
== "+") ? 1 : -1;
3226 if (tok_is(t
, tok_keyword
, "limit"))
3228 swallow (); // get past the "limit"
3229 s
->limit
= parse_expression ();
3233 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
3234 throw PARSE_ERROR ("expected ')'");
3237 s
->block
= parse_statement ();
3243 parser::parse_expression ()
3245 return parse_assignment ();
3250 parser::parse_assignment ()
3252 expression
* op1
= parse_ternary ();
3254 const token
* t
= peek ();
3255 // right-associative operators
3256 if (t
&& t
->type
== tok_operator
3257 && (t
->content
== "=" ||
3258 t
->content
== "<<<" ||
3259 t
->content
== "+=" ||
3260 t
->content
== "-=" ||
3261 t
->content
== "*=" ||
3262 t
->content
== "/=" ||
3263 t
->content
== "%=" ||
3264 t
->content
== "<<=" ||
3265 t
->content
== ">>=" ||
3266 t
->content
== "&=" ||
3267 t
->content
== "^=" ||
3268 t
->content
== "|=" ||
3269 t
->content
== ".=" ||
3272 // NB: lvalueness is checked during elaboration / translation
3273 assignment
* e
= new assignment
;
3278 e
->right
= parse_expression ();
3287 parser::parse_ternary ()
3289 expression
* op1
= parse_logical_or ();
3291 const token
* t
= peek ();
3292 if (t
&& t
->type
== tok_operator
&& t
->content
== "?")
3294 ternary_expression
* e
= new ternary_expression
;
3298 e
->truevalue
= parse_expression (); // XXX
3301 if (! (t
->type
== tok_operator
&& t
->content
== ":"))
3302 throw PARSE_ERROR (_("expected ':'"));
3305 if (input
.has_version("4.0"))
3306 e
->falsevalue
= parse_ternary ();
3308 e
->falsevalue
= parse_expression ();
3317 parser::parse_logical_or ()
3319 expression
* op1
= parse_logical_and ();
3321 const token
* t
= peek ();
3322 while (t
&& t
->type
== tok_operator
&& t
->content
== "||")
3324 logical_or_expr
* e
= new logical_or_expr
;
3329 e
->right
= parse_logical_and ();
3339 parser::parse_logical_and ()
3341 expression
* op1
= parse_boolean_or ();
3343 const token
* t
= peek ();
3344 while (t
&& t
->type
== tok_operator
&& t
->content
== "&&")
3346 logical_and_expr
*e
= new logical_and_expr
;
3351 e
->right
= parse_boolean_or ();
3361 parser::parse_boolean_or ()
3363 expression
* op1
= parse_boolean_xor ();
3365 const token
* t
= peek ();
3366 while (t
&& t
->type
== tok_operator
&& t
->content
== "|")
3368 binary_expression
* e
= new binary_expression
;
3373 e
->right
= parse_boolean_xor ();
3383 parser::parse_boolean_xor ()
3385 expression
* op1
= parse_boolean_and ();
3387 const token
* t
= peek ();
3388 while (t
&& t
->type
== tok_operator
&& t
->content
== "^")
3390 binary_expression
* e
= new binary_expression
;
3395 e
->right
= parse_boolean_and ();
3405 parser::parse_boolean_and ()
3407 expression
* op1
= parse_array_in ();
3409 const token
* t
= peek ();
3410 while (t
&& t
->type
== tok_operator
&& t
->content
== "&")
3412 binary_expression
* e
= new binary_expression
;
3417 e
->right
= parse_array_in ();
3427 parser::parse_array_in ()
3429 // This is a very tricky case. All these are legit expressions:
3430 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
3431 vector
<expression
*> indexes
;
3432 bool parenthesized
= false;
3434 const token
* t
= peek ();
3435 if (t
&& t
->type
== tok_operator
&& t
->content
== "[")
3438 parenthesized
= true;
3444 if (t
&& t
->type
== tok_operator
&& t
->content
== "*" && parenthesized
)
3447 indexes
.push_back(NULL
);
3451 expression
* op1
= parse_comparison_or_regex_query ();
3452 indexes
.push_back (op1
);
3457 const token
* t
= peek ();
3458 if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
3463 else if (t
&& t
->type
== tok_operator
&& t
->content
== "]")
3469 throw PARSE_ERROR (_("expected ',' or ']'"));
3472 break; // expecting only one expression
3476 if (t
&& t
->type
== tok_keyword
&& t
->content
== "in")
3478 array_in
*e
= new array_in
;
3482 arrayindex
* a
= new arrayindex
;
3483 a
->indexes
= indexes
;
3484 a
->base
= parse_indexable();
3485 a
->tok
= a
->base
->tok
;
3489 else if (indexes
.size() == 1) // no "in" - need one expression only
3492 throw PARSE_ERROR (_("unexpected comma-separated expression list"));
3497 parser::parse_comparison_or_regex_query ()
3499 expression
* op1
= parse_shift ();
3501 // XXX precedence -- perhaps a =~ b == c =~ d --> (a =~ b) == (c =~ d) ?
3502 const token
*t
= peek();
3503 if (t
&& t
->type
== tok_operator
3504 && (t
->content
== "=~" ||
3505 t
->content
== "!~"))
3507 regex_query
* r
= new regex_query
;
3512 r
->right
= parse_literal_string();
3516 else while (t
&& t
->type
== tok_operator
3517 && (t
->content
== ">" ||
3518 t
->content
== "<" ||
3519 t
->content
== "==" ||
3520 t
->content
== "!=" ||
3521 t
->content
== "<=" ||
3522 t
->content
== ">="))
3524 comparison
* e
= new comparison
;
3529 e
->right
= parse_shift ();
3539 parser::parse_shift ()
3541 expression
* op1
= parse_concatenation ();
3543 const token
* t
= peek ();
3544 while (t
&& t
->type
== tok_operator
&&
3545 (t
->content
== "<<" || t
->content
== ">>" || t
->content
== ">>>"))
3547 binary_expression
* e
= new binary_expression
;
3552 e
->right
= parse_concatenation ();
3562 parser::parse_concatenation ()
3564 expression
* op1
= parse_additive ();
3566 const token
* t
= peek ();
3567 // XXX: the actual awk string-concatenation operator is *whitespace*.
3568 // I don't know how to easily to model that here.
3569 while (t
&& t
->type
== tok_operator
&& t
->content
== ".")
3571 concatenation
* e
= new concatenation
;
3576 e
->right
= parse_additive ();
3586 parser::parse_additive ()
3588 expression
* op1
= parse_multiplicative ();
3590 const token
* t
= peek ();
3591 while (t
&& t
->type
== tok_operator
3592 && (t
->content
== "+" || t
->content
== "-"))
3594 binary_expression
* e
= new binary_expression
;
3599 e
->right
= parse_multiplicative ();
3609 parser::parse_multiplicative ()
3611 expression
* op1
= parse_unary ();
3613 const token
* t
= peek ();
3614 while (t
&& t
->type
== tok_operator
3615 && (t
->content
== "*" || t
->content
== "/" || t
->content
== "%"))
3617 binary_expression
* e
= new binary_expression
;
3622 e
->right
= parse_unary ();
3632 parser::parse_unary ()
3634 const token
* t
= peek ();
3635 if (t
&& t
->type
== tok_operator
3636 && (t
->content
== "+" ||
3637 t
->content
== "-" ||
3638 t
->content
== "!" ||
3639 t
->content
== "~" ||
3642 unary_expression
* e
= new unary_expression
;
3646 e
->operand
= parse_unary ();
3650 return parse_crement ();
3655 parser::parse_crement () // as in "increment" / "decrement"
3657 // NB: Ideally, we'd parse only a symbol as an operand to the
3658 // *crement operators, instead of a general expression value. We'd
3659 // need more complex lookahead code to tell apart the postfix cases.
3660 // So we just punt, and leave it to pass-3 to signal errors on
3661 // cases like "4++".
3663 const token
* t
= peek ();
3664 if (t
&& t
->type
== tok_operator
3665 && (t
->content
== "++" || t
->content
== "--"))
3667 pre_crement
* e
= new pre_crement
;
3671 e
->operand
= parse_dwarf_value ();
3675 // post-crement or non-crement
3676 expression
*op1
= parse_dwarf_value ();
3679 if (t
&& t
->type
== tok_operator
3680 && (t
->content
== "++" || t
->content
== "--"))
3682 post_crement
* e
= new post_crement
;
3695 parser::parse_dwarf_value ()
3697 expression
* expr
= NULL
;
3698 target_symbol
* tsym
= NULL
;
3700 // With '&' we'll definitely be making a target symbol of some sort
3701 const token
* addrtok
= peek_op ("&") ? next () : NULL
;
3702 bool addressof
= (addrtok
!= NULL
);
3704 // First try target_symbol types: $var, @cast, and @var.
3705 const token
* t
= peek ();
3706 if (t
&& t
->type
== tok_identifier
&& t
->content
[0] == '$')
3707 expr
= tsym
= parse_target_symbol ();
3708 else if (tok_is (t
, tok_operator
, "@cast"))
3709 expr
= tsym
= parse_cast_op ();
3710 else if (tok_is (t
, tok_operator
, "@var"))
3711 expr
= tsym
= parse_atvar_op ();
3712 else if (addressof
&& !input
.has_version("2.6"))
3713 // '&' on old version only allowed specific target_symbol types
3714 throw PARSE_ERROR (_("expected @cast, @var or $var"));
3717 // Otherwise just get a plain value of any sort.
3718 expr
= parse_value ();
3721 tsym
= dynamic_cast<target_symbol
*> (expr
);
3722 if (tsym
&& tsym
->addressof
)
3723 throw PARSE_ERROR (_("cannot take address more than once"),
3728 // If we had '&' or see any target suffixes, that forces a target_symbol.
3729 // For compatibility, we only do this starting with 2.6.
3730 if (!tsym
&& (addressof
|| peek_target_symbol_components ())
3731 && input
.has_version("2.6"))
3733 autocast_op
*cop
= new autocast_op
;
3734 cop
->tok
= addrtok
?: peek ();
3735 cop
->operand
= expr
;
3741 // Parse the rest of any kind of target symbol
3742 tsym
->addressof
= addressof
;
3743 parse_target_symbol_components (tsym
);
3751 parser::parse_value ()
3753 const token
* t
= peek ();
3755 throw PARSE_ERROR (_("expected value"));
3757 if (t
->type
== tok_embedded
)
3760 throw PARSE_ERROR (_("embedded expression code in unprivileged script; need stap -g"), false);
3762 embedded_expr
*e
= new embedded_expr
;
3764 e
->code
= t
->content
;
3769 if (t
->type
== tok_operator
&& t
->content
== "(")
3772 expression
* e
= parse_expression ();
3774 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
3775 throw PARSE_ERROR (_("expected ')'"));
3779 else if (t
->type
== tok_identifier
3780 || (t
->type
== tok_operator
&& t
->content
[0] == '@'))
3781 return parse_symbol ();
3783 return parse_literal ();
3788 parser::parse_hist_op_or_bare_name (hist_op
*&hop
, interned_string
&name
)
3791 const token
* t
= expect_ident_or_atword (name
);
3792 if (name
== "@hist_linear" || name
== "@hist_log")
3795 if (name
== "@hist_linear")
3796 hop
->htype
= hist_linear
;
3797 else if (name
== "@hist_log")
3798 hop
->htype
= hist_log
;
3801 hop
->stat
= parse_expression ();
3803 if (hop
->htype
== hist_linear
)
3805 for (size_t i
= 0; i
< 3; ++i
)
3808 expect_number (tnum
);
3809 hop
->params
.push_back (tnum
);
3819 parser::parse_indexable ()
3821 hist_op
*hop
= NULL
;
3822 interned_string name
;
3823 const token
*tok
= parse_hist_op_or_bare_name(hop
, name
);
3828 symbol
* sym
= new symbol
;
3836 // var, indexable[index], func(parms), printf("...", ...),
3837 // @defined, @entry, @stat_op(stat)
3838 expression
* parser::parse_symbol ()
3840 hist_op
*hop
= NULL
;
3842 interned_string name
;
3843 unsigned max_params
= 0;
3844 const token
*t
= parse_hist_op_or_bare_name(hop
, name
);
3848 // If we didn't get a hist_op, then we did get an identifier. We can
3849 // now scrutinize this identifier for the various magic forms of identifier
3850 // (printf, @stat_op...)
3852 // NB: PR11343: @defined() is not incompatible with earlier versions
3853 // of stap, so no need to check session.compatible for 1.2
3854 if (name
== "@defined")
3855 return parse_defined_op (t
);
3857 if (name
== "@const")
3858 return parse_const_op (t
);
3860 if (name
== "@entry")
3861 return parse_entry_op (t
);
3863 if (name
== "@perf")
3864 return parse_perf_op (t
);
3866 if (input
.has_version("4.0"))
3868 if (name
== "@kregister" || name
== "@uregister")
3869 return parse_target_register (t
);
3871 if (name
== "@kderef" || name
== "@uderef")
3872 return parse_target_deref (t
);
3875 if (name
.size() > 0 && name
[0] == '@')
3877 stat_op
*sop
= new stat_op
;
3879 sop
->ctype
= sc_average
;
3880 else if (name
== "@variance")
3881 sop
->ctype
= sc_variance
, max_params
= 1;
3882 else if (name
== "@count")
3883 sop
->ctype
= sc_count
;
3884 else if (name
== "@sum")
3885 sop
->ctype
= sc_sum
;
3886 else if (name
== "@min")
3887 sop
->ctype
= sc_min
;
3888 else if (name
== "@max")
3889 sop
->ctype
= sc_max
;
3891 throw PARSE_ERROR(_F("unknown operator %s",
3892 name
.to_string().c_str()));
3895 sop
->stat
= parse_expression ();
3900 if (t
&& t
->type
== tok_operator
&& t
->content
== ")")
3905 else if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
3907 if (sop
->params
.size() >= max_params
)
3908 throw PARSE_ERROR(_NF("not more than %d parameter allowed",
3909 "not more than %d parameters allowed",
3910 max_params
+1, max_params
+1), t
);
3914 expect_number (tnum
);
3915 sop
->params
.push_back (tnum
);
3921 else if (print_format
*fmt
= print_format::create(t
))
3924 if ((name
== "print" || name
== "println" ||
3925 name
== "sprint" || name
== "sprintln") &&
3926 (peek_op("@hist_linear") || peek_op("@hist_log")))
3928 // We have a special case where we recognize
3929 // print(@hist_foo(bar)) as a magic print-the-histogram
3930 // construct. This is sort of gross but it avoids
3931 // promoting histogram references to typeful
3935 t
= parse_hist_op_or_bare_name(hop
, name
);
3938 // It is, sadly, possible that even while parsing a
3939 // hist_op, we *mis-guessed* and the user wishes to
3940 // print(@hist_op(foo)[bucket]), a scalar. In that case
3941 // we must parse the arrayindex and print an expression.
3943 // XXX: This still fails if the arrayindex is part of a
3944 // larger expression. To really handle everything, we'd
3945 // need to push back all the hist tokens start over.
3951 // This is simplified version of the
3952 // multi-array-index parser below, because we can
3953 // only ever have one index on a histogram anyways.
3955 struct arrayindex
* ai
= new arrayindex
;
3958 ai
->indexes
.push_back (parse_expression ());
3960 fmt
->args
.push_back(ai
);
3962 // Consume any subsequent arguments.
3963 while (!peek_op(")"))
3965 // ')' is not possible here but we want to output a nicer
3966 // parser error message.
3967 (void) expect_op_any ({",", ")"});
3968 expression
*e
= parse_expression ();
3969 fmt
->args
.push_back(e
);
3976 bool consumed_arg
= false;
3977 if (fmt
->print_with_format
)
3979 // Consume and convert a format string. Agreement between the
3980 // format string and the arguments is postponed to the
3981 // typechecking phase.
3982 literal_string
* ls
= parse_literal_string();
3983 fmt
->raw_components
= ls
->value
;
3985 fmt
->components
= print_format::string_to_components (fmt
->raw_components
);
3986 consumed_arg
= true;
3988 else if (fmt
->print_with_delim
)
3990 // Consume a delimiter to separate arguments.
3991 literal_string
* ls
= parse_literal_string();
3992 fmt
->delimiter
= ls
->value
;
3994 consumed_arg
= true;
3995 min_args
= 2; // so that the delim is used at least once
3997 else if (!fmt
->print_with_newline
)
3999 // If we are not printing with a format string, nor with a
4000 // delim, nor with a newline, then it's either print() or
4001 // sprint(), both of which require at least one argument (of
4006 // Consume any subsequent arguments.
4007 while (min_args
|| !peek_op (")"))
4009 // ')' is not possible here but we want to output a nicer
4010 // parser error message.
4012 (void) expect_op_any({",", ")"});
4013 expression
*e
= parse_expression ();
4014 fmt
->args
.push_back(e
);
4015 consumed_arg
= true;
4024 else if (peek_op ("(")) // function call
4027 struct functioncall
* f
= new functioncall
;
4030 // Allow empty actual parameter list
4038 f
->args
.push_back (parse_expression ());
4039 interned_string op
= expect_op_any({")", ","});
4056 // By now, either we had a hist_op in the first place, or else
4057 // we had a plain word and it was converted to a symbol.
4059 assert (!hop
!= !sym
); // logical XOR
4061 // All that remains is to check for array indexing
4063 if (peek_op ("[")) // array
4066 struct arrayindex
* ai
= new arrayindex
;
4079 ai
->indexes
.push_back (NULL
);
4082 ai
->indexes
.push_back (parse_expression ());
4083 interned_string op
= expect_op_any({"]", ","});
4093 // If we got to here, we *should* have a symbol; if we have
4094 // a hist_op on its own, it doesn't count as an expression,
4095 // so we throw a parse error.
4098 throw PARSE_ERROR(_("base histogram operator where expression expected"), t
);
4104 target_symbol
* parser::parse_target_symbol ()
4106 const token
* t
= next ();
4107 if (t
->type
== tok_identifier
&& t
->content
[0]=='$')
4109 // target_symbol time
4110 target_symbol
*tsym
= new target_symbol
;
4112 tsym
->name
= t
->content
;
4116 throw PARSE_ERROR (_("expected $var"));
4121 cast_op
* parser::parse_cast_op ()
4123 const token
* t
= next ();
4124 if (t
->type
== tok_operator
&& t
->content
== "@cast")
4126 cast_op
*cop
= new cast_op
;
4128 cop
->name
= t
->content
;
4130 cop
->operand
= parse_expression ();
4132 expect_unknown(tok_string
, cop
->type_name
);
4133 if (cop
->type_name
.empty())
4134 throw PARSE_ERROR (_("expected non-empty string"));
4138 expect_unknown(tok_string
, cop
->module
);
4144 throw PARSE_ERROR (_("expected @cast"));
4149 atvar_op
* parser::parse_atvar_op ()
4151 const token
* t
= next ();
4152 if (t
->type
== tok_operator
&& t
->content
== "@var")
4154 atvar_op
*aop
= new atvar_op
;
4156 aop
->name
= t
->content
;
4158 expect_unknown(tok_string
, aop
->target_name
);
4159 size_t found_at
= aop
->target_name
.find("@");
4160 if (found_at
!= string::npos
)
4161 aop
->cu_name
= aop
->target_name
.substr(found_at
+ 1);
4167 expect_unknown (tok_string
, aop
->module
);
4175 throw PARSE_ERROR (_("expected @var"));
4179 // Parse a @defined(). Given head token has already been consumed.
4180 expression
* parser::parse_defined_op (const token
* t
)
4182 defined_op
* dop
= new defined_op
;
4185 dop
->operand
= parse_expression ();
4191 // Parse a @const(). Given head token has already been consumed.
4192 expression
* parser::parse_const_op (const token
* t
)
4195 throw PARSE_ERROR (_("using @const operator not permitted; need stap -g"),
4196 false /* don't skip tokens for parse resumption */);
4198 interned_string cnst
;
4199 embedded_expr
*ee
= new embedded_expr
;
4202 expect_unknown(tok_string
, cnst
);
4204 throw PARSE_ERROR (_("expected non-empty string"));
4206 ee
->code
= string("/* pure */ /* unprivileged */ /* stable */ ") + string(cnst
);
4211 // Parse a @entry(). Given head token has already been consumed.
4212 expression
* parser::parse_entry_op (const token
* t
)
4214 entry_op
* eop
= new entry_op
;
4217 eop
->operand
= parse_expression ();
4223 // Parse a @perf(). Given head token has already been consumed.
4224 expression
* parser::parse_perf_op (const token
* t
)
4226 perf_op
* pop
= new perf_op
;
4229 pop
->operand
= parse_literal_string ();
4230 if (pop
->operand
->value
== "")
4231 throw PARSE_ERROR (_("expected non-empty string"));
4236 // Parse a @kregister or @uregister. Given head token has already been consumed.
4237 expression
* parser::parse_target_register (const token
* t
)
4239 target_register
*treg
= new target_register
;
4242 treg
->userspace_p
= (t
->content
[1] == 'u');
4243 if (! treg
->userspace_p
&& ! privileged
)
4244 throw PARSE_ERROR (_("using @kregister operator not permitted; need stap -g"),
4245 false /* don't skip tokens for parse resumption */);
4247 expect_number(regno
);
4248 treg
->regno
= regno
;
4253 // Parse a @kderef or @uderef. Given head token has already been consumed.
4254 expression
* parser::parse_target_deref (const token
* t
)
4256 target_deref
*tderef
= new target_deref
;
4259 tderef
->userspace_p
= (t
->content
[1] == 'u');
4260 if (! tderef
->userspace_p
&& ! privileged
)
4261 throw PARSE_ERROR (_("using @kderef operator not permitted; need stap -g"),
4262 false /* don't skip tokens for parse resumption */);
4264 expect_number(size
);
4265 tderef
->size
= size
;
4267 tderef
->addr
= parse_expression();
4273 parser::peek_target_symbol_components ()
4275 const token
* t
= peek ();
4277 ((t
->type
== tok_operator
&& (t
->content
== "->" || t
->content
== "["))
4278 || (t
->type
== tok_identifier
&&
4279 t
->content
.find_first_not_of('$') == string::npos
));
4283 parser::parse_target_symbol_components (target_symbol
* e
)
4285 bool pprint
= false;
4287 // check for pretty-print in the form $foo$
4288 string base
= e
->name
;
4289 size_t pprint_pos
= base
.find_last_not_of('$');
4290 if (0 < pprint_pos
&& pprint_pos
< base
.length() - 1)
4292 string pprint_val
= base
.substr(pprint_pos
+ 1);
4293 base
.erase(pprint_pos
+ 1);
4295 e
->components
.push_back (target_symbol::component(e
->tok
, pprint_val
, true));
4303 const token
* t
= next();
4304 interned_string member
;
4305 expect_ident_or_keyword (member
);
4307 // check for pretty-print in the form $foo->$ or $foo->bar$
4308 pprint_pos
= member
.find_last_not_of('$');
4309 interned_string pprint_val
;
4310 if (pprint_pos
== string::npos
|| pprint_pos
< member
.length() - 1)
4312 pprint_val
= member
.substr(pprint_pos
+ 1);
4313 member
= member
.substr(0, pprint_pos
+ 1);
4317 if (!member
.empty())
4318 e
->components
.push_back (target_symbol::component(t
, member
));
4320 e
->components
.push_back (target_symbol::component(t
, pprint_val
, true));
4322 else if (peek_op ("["))
4324 const token
* t
= next();
4325 expression
* index
= parse_expression();
4326 literal_number
* ln
= dynamic_cast<literal_number
*>(index
);
4328 e
->components
.push_back (target_symbol::component(t
, ln
->value
));
4330 e
->components
.push_back (target_symbol::component(t
, index
));
4339 // check for pretty-print in the form $foo $
4340 // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$
4341 const token
* t
= peek();
4342 if (t
!= NULL
&& t
->type
== tok_identifier
&&
4343 t
->content
.find_first_not_of('$') == string::npos
)
4346 e
->components
.push_back (target_symbol::component(t
, t
->content
, true));
4351 if (pprint
&& (peek_op ("->") || peek_op("[")))
4352 throw PARSE_ERROR(_("-> and [ are not accepted for a pretty-printing variable"));
4355 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */