1 // recursive descent parser for systemtap scripts
2 // Copyright (C) 2005-2015 Red Hat Inc.
3 // Copyright (C) 2006 Intel Corporation.
4 // Copyright (C) 2007 Bull S.A.S
5 // Copyright (C) 2014 Peter Kjellstrom <cap@nsc.liu.se>
7 // This file is part of systemtap, and is free software. You can
8 // redistribute it and/or modify it under the terms of the GNU General
9 // Public License (GPL); either version 2, or (at your option) any
17 #include "stringtable.h"
18 #include "unordered.h"
43 bool ate_comment
; // current token follows a comment
44 bool ate_whitespace
; // the most recent token followed whitespace
45 bool saw_tokens
; // the lexer found tokens (before preprocessing occurred)
46 bool check_compatible
; // whether to gate features on session.compatible
49 lexer (istream
&, const string
&, systemtap_session
&, bool);
50 void set_current_file (stapfile
* f
);
51 void set_current_token_chain (const token
* tok
);
52 inline bool has_version (const char* v
) const;
54 unordered_set
<interned_string
> keywords
;
55 static unordered_set
<string
> atwords
;
57 inline int input_get ();
58 inline int input_peek (unsigned n
=0);
59 void input_put (const string
&, const token
*);
61 string input_contents
; // NB: being a temporary, no need to interned_string optimize this object
62 const char *input_pointer
; // index into input_contents; NB: recompute if input_contents changed!
63 const char *input_end
;
64 unsigned cursor_suspend_count
;
65 unsigned cursor_suspend_line
;
66 unsigned cursor_suspend_column
;
68 unsigned cursor_column
;
69 systemtap_session
& session
;
70 stapfile
* current_file
;
71 const token
* current_token_chain
;
78 parser (systemtap_session
& s
, const string
& n
, istream
& i
, unsigned flags
=0);
82 probe
* parse_synthetic_probe (const token
* chain
);
83 stapfile
* parse_library_macros ();
94 struct pp1_activation
;
96 struct pp_macrodecl
: public macrodecl
{
97 pp1_activation
* parent_act
; // used for param bindings
98 virtual bool is_closure() { return parent_act
!= 0; }
99 pp_macrodecl () : macrodecl(), parent_act(0) { }
102 systemtap_session
& session
;
105 bool errs_as_warnings
;
108 parse_context context
;
110 // preprocessing subordinate, first pass (macros)
111 struct pp1_activation
{
113 unsigned cursor
; // position within macro body
114 map
<string
, pp_macrodecl
*> params
;
116 macrodecl
* curr_macro
;
118 pp1_activation (const token
* tok
, macrodecl
* curr_macro
)
119 : tok(tok
), cursor(0), curr_macro(curr_macro
) { }
123 map
<string
, macrodecl
*> pp1_namespace
;
124 vector
<pp1_activation
*> pp1_state
;
125 const token
* next_pp1 ();
126 const token
* scan_pp1 (bool ignore_macros
);
127 const token
* slurp_pp1_param (vector
<const token
*>& param
);
128 const token
* slurp_pp1_body (vector
<const token
*>& body
);
130 // preprocessing subordinate, final pass (conditionals)
131 vector
<pair
<const token
*, pp_state_t
> > pp_state
;
132 const token
* scan_pp ();
133 const token
* skip_pp ();
136 const token
* next ();
137 const token
* peek ();
139 // Advance past and throw away current token after peek () or next ().
142 const token
* systemtap_v_seen
;
143 const token
* last_t
; // the last value returned by peek() or next()
144 const token
* next_t
; // lookahead token
146 // expectations, these swallow the token
147 void expect_known (token_type tt
, string
const & expected
);
148 void expect_unknown (token_type tt
, interned_string
& target
);
149 void expect_unknown2 (token_type tt1
, token_type tt2
, interned_string
& target
);
151 // convenience forms, these also swallow the token
152 void expect_op (string
const & expected
);
153 void expect_kw (string
const & expected
);
154 void expect_number (int64_t & expected
);
155 void expect_ident_or_keyword (interned_string
& target
);
157 // convenience forms, which return true or false, these don't swallow token
158 bool peek_op (string
const & op
);
159 bool peek_kw (string
const & kw
);
161 // convenience forms, which return the token
162 const token
* expect_kw_token (string
const & expected
);
163 const token
* expect_ident_or_atword (interned_string
& target
);
165 void print_error (const parse_error
& pe
, bool errs_as_warnings
= false);
168 private: // nonterminals
169 void parse_probe (vector
<probe
*>&, vector
<probe_alias
*>&);
170 void parse_private (vector
<vardecl
*>&, vector
<probe
*>&,
171 string
const&, vector
<functiondecl
*>&);
172 void parse_global (vector
<vardecl
*>&, vector
<probe
*>&,
174 void do_parse_global (vector
<vardecl
*>&, vector
<probe
*>&,
175 string
const&, const token
*, bool);
176 void parse_functiondecl (vector
<functiondecl
*>&, string
const&);
177 void do_parse_functiondecl (vector
<functiondecl
*>&, const token
*,
178 string
const&, bool);
179 embeddedcode
* parse_embeddedcode ();
180 vector
<probe_point
*> parse_probe_points ();
181 vector
<probe_point
*> parse_components ();
182 vector
<probe_point
*> parse_component ();
183 literal_string
* consume_string_literals (const token
*);
184 literal_string
* parse_literal_string ();
185 literal
* parse_literal ();
186 block
* parse_stmt_block ();
187 try_block
* parse_try_block ();
188 statement
* parse_statement ();
189 if_statement
* parse_if_statement ();
190 for_loop
* parse_for_loop ();
191 for_loop
* parse_while_loop ();
192 foreach_loop
* parse_foreach_loop ();
193 expr_statement
* parse_expr_statement ();
194 return_statement
* parse_return_statement ();
195 delete_statement
* parse_delete_statement ();
196 next_statement
* parse_next_statement ();
197 break_statement
* parse_break_statement ();
198 continue_statement
* parse_continue_statement ();
199 indexable
* parse_indexable ();
200 const token
*parse_hist_op_or_bare_name (hist_op
*&hop
, interned_string
&name
);
201 target_symbol
*parse_target_symbol ();
202 cast_op
*parse_cast_op ();
203 atvar_op
*parse_atvar_op ();
204 expression
* parse_entry_op (const token
* t
);
205 expression
* parse_defined_op (const token
* t
);
206 expression
* parse_perf_op (const token
* t
);
207 expression
* parse_expression ();
208 expression
* parse_assignment ();
209 expression
* parse_ternary ();
210 expression
* parse_logical_or ();
211 expression
* parse_logical_and ();
212 expression
* parse_boolean_or ();
213 expression
* parse_boolean_xor ();
214 expression
* parse_boolean_and ();
215 expression
* parse_array_in ();
216 expression
* parse_comparison_or_regex_query ();
217 expression
* parse_shift ();
218 expression
* parse_concatenation ();
219 expression
* parse_additive ();
220 expression
* parse_multiplicative ();
221 expression
* parse_unary ();
222 expression
* parse_crement ();
223 expression
* parse_dwarf_value ();
224 expression
* parse_value ();
225 expression
* parse_symbol ();
227 bool peek_target_symbol_components ();
228 void parse_target_symbol_components (target_symbol
* e
);
232 // ------------------------------------------------------------------------
235 parse (systemtap_session
& s
, const string
& n
, istream
& i
, unsigned flags
)
237 parser
p (s
, n
, i
, flags
);
242 parse (systemtap_session
& s
, const string
& name
, unsigned flags
)
244 ifstream
i(name
.c_str(), ios::in
);
247 cerr
<< (file_exists(name
)
248 ? _F("Input file '%s' can't be opened for reading.", name
.c_str())
249 : _F("Input file '%s' is missing.", name
.c_str()))
254 parser
p (s
, name
, i
, flags
);
259 parse_library_macros (systemtap_session
& s
, const string
& name
)
261 ifstream
i(name
.c_str(), ios::in
);
264 cerr
<< (file_exists(name
)
265 ? _F("Input file '%s' can't be opened for reading.", name
.c_str())
266 : _F("Input file '%s' is missing.", name
.c_str()))
271 parser
p (s
, name
, i
);
272 return p
.parse_library_macros ();
276 parse_synthetic_probe (systemtap_session
&s
, istream
& i
, const token
* tok
)
278 parser
p (s
, tok
? tok
->location
.file
->name
: "<synthetic>", i
);
279 return p
.parse_synthetic_probe (tok
);
282 // ------------------------------------------------------------------------
284 parser::parser (systemtap_session
& s
, const string
&n
, istream
& i
, unsigned flags
):
285 session (s
), input_name (n
), input (i
, input_name
, s
, !(flags
& pf_no_compatible
)),
286 errs_as_warnings(flags
& pf_squash_errors
), privileged (flags
& pf_guru
),
287 user_file (flags
& pf_user_file
), context(con_unknown
), systemtap_v_seen(0),
288 last_t (0), next_t (0), num_errors (0)
297 tt2str(token_type tt
)
301 case tok_junk
: return "junk";
302 case tok_identifier
: return "identifier";
303 case tok_operator
: return "operator";
304 case tok_string
: return "string";
305 case tok_number
: return "number";
306 case tok_embedded
: return "embedded-code";
307 case tok_keyword
: return "keyword";
309 return "unknown token";
313 operator << (ostream
& o
, const source_loc
& loc
)
315 o
<< loc
.file
->name
<< ":"
323 operator << (ostream
& o
, const token
& t
)
327 if (t
.type
!= tok_embedded
&& t
.type
!= tok_keyword
) // XXX: other types?
330 for (unsigned i
=0; i
<t
.content
.length(); i
++)
332 char c
= t
.content
[i
];
333 o
<< (isprint (c
) ? c
: '?');
346 parser::print_error (const parse_error
&pe
, bool errs_as_warnings
)
348 const token
*tok
= pe
.tok
? pe
.tok
: last_t
;
349 session
.print_error(pe
, tok
, input_name
, errs_as_warnings
);
356 template <typename OPERAND
>
357 bool eval_comparison (const OPERAND
& lhs
, const token
* op
, const OPERAND
& rhs
)
359 if (op
->type
== tok_operator
&& op
->content
== "<=")
360 { return lhs
<= rhs
; }
361 else if (op
->type
== tok_operator
&& op
->content
== ">=")
362 { return lhs
>= rhs
; }
363 else if (op
->type
== tok_operator
&& op
->content
== "<")
364 { return lhs
< rhs
; }
365 else if (op
->type
== tok_operator
&& op
->content
== ">")
366 { return lhs
> rhs
; }
367 else if (op
->type
== tok_operator
&& op
->content
== "==")
368 { return lhs
== rhs
; }
369 else if (op
->type
== tok_operator
&& op
->content
== "!=")
370 { return lhs
!= rhs
; }
372 throw PARSE_ERROR (_("expected comparison operator"), op
);
376 // Here, we perform on-the-fly preprocessing in two passes.
378 // First pass - macro declaration and expansion.
380 // The basic form of a declaration is @define SIGNATURE %( BODY %)
381 // where SIGNATURE is of the form macro_name (a, b, c, ...)
382 // and BODY can obtain the parameter contents as @a, @b, @c, ....
383 // Note that parameterless macros can also be declared.
385 // Macro definitions may not be nested.
386 // A macro is available textually after it has been defined.
388 // The basic form of a macro invocation
389 // for a parameterless macro is @macro_name,
390 // for a macro with parameters is @macro_name(param_1, param_2, ...).
392 // NB: this means that a parameterless macro @foo called as @foo(a, b, c)
393 // leaves its 'parameters' alone, rather than consuming them to result
394 // in a "too many parameters error". This may be useful in the unusual
395 // case of wanting @foo to expand to the name of a function.
397 // Invocations of unknown macros are left unexpanded, to allow
398 // the continued use of constructs such as @cast, @var, etc.
400 macrodecl::~macrodecl ()
403 for (vector
<const token
*>::iterator it
= body
.begin();
404 it
!= body
.end(); it
++)
408 parser::pp1_activation::~pp1_activation ()
411 if (curr_macro
->is_closure()) return; // body is shared with an earlier declaration
412 for (map
<string
, pp_macrodecl
*>::iterator it
= params
.begin();
413 it
!= params
.end(); it
++)
417 // Grab a token from the current input source (main file or macro body):
421 if (pp1_state
.empty())
422 return input
.scan ();
424 // otherwise, we're inside a macro
425 pp1_activation
* act
= pp1_state
.back();
426 unsigned& cursor
= act
->cursor
;
427 if (cursor
< act
->curr_macro
->body
.size())
429 token
* t
= new token(*act
->curr_macro
->body
[cursor
]);
430 t
->chain
= new token(*act
->tok
); // mark chained token
435 return 0; // reached end of macro body
439 parser::scan_pp1 (bool ignore_macros
= false)
443 const token
* t
= next_pp1 ();
444 if (t
== 0) // EOF or end of macro body
446 if (pp1_state
.empty()) // actual EOF
449 // Exit macro and loop around to look for the next token.
450 pp1_activation
* act
= pp1_state
.back();
451 pp1_state
.pop_back(); delete act
;
456 // PR18462 don't catalog preprocessor-disabled macros
457 if (t
->type
== tok_operator
&& t
->content
== "@define" && !ignore_macros
)
459 if (!pp1_state
.empty())
460 throw PARSE_ERROR (_("'@define' forbidden inside macro body"), t
);
463 // handle macro definition
464 // (1) consume macro signature
466 if (! (t
&& t
->type
== tok_identifier
))
467 throw PARSE_ERROR (_("expected identifier"), t
);
468 string name
= t
->content
;
470 // check for redefinition of existing macro
471 if (pp1_namespace
.find(name
) != pp1_namespace
.end())
473 parse_error
er (ERR_SRC
, _F("attempt to redefine macro '@%s' in the same file", name
.c_str ()), t
);
475 // Also point to pp1_namespace[name]->tok, the site of
476 // the original definition:
477 er
.chain
= new PARSE_ERROR (_F("macro '@%s' first defined here",
478 name
.c_str()), pp1_namespace
[name
]->tok
);
482 // XXX: the above restriction was mostly necessary due to
483 // wanting to leave open the possibility of
484 // statically-scoped semantics in the future.
486 // XXX: this cascades into further parse errors as the
487 // parser tries to parse the remaining definition... (e.g.
488 // it can't tell that the macro body isn't a conditional,
489 // that the uses of parameters aren't nonexistent
491 if (name
== "define")
492 throw PARSE_ERROR (_("attempt to redefine '@define'"), t
);
493 if (input
.atwords
.count(name
))
494 session
.print_warning (_F("macro redefines built-in operator '@%s'", name
.c_str()), t
);
496 macrodecl
* decl
= (pp1_namespace
[name
] = new macrodecl
);
499 // determine if the macro takes parameters
500 bool saw_params
= false;
502 if (t
&& t
->type
== tok_operator
&& t
->content
== "(")
510 if (! (t
&& t
->type
== tok_identifier
))
511 throw PARSE_ERROR(_("expected identifier"), t
);
512 decl
->formal_args
.push_back(t
->content
);
516 if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
520 else if (t
&& t
->type
== tok_operator
&& t
->content
== ")")
528 throw PARSE_ERROR (_("expected ',' or ')'"), t
);
534 // (2) identify & consume macro body
535 if (! (t
&& t
->type
== tok_operator
&& t
->content
== "%("))
538 throw PARSE_ERROR (_("expected '%('"), t
);
540 throw PARSE_ERROR (_("expected '%(' or '('"), t
);
544 t
= slurp_pp1_body (decl
->body
);
546 throw PARSE_ERROR (_("incomplete macro definition - missing '%)'"), decl
->tok
);
549 // Now loop around to look for a real token.
553 // (potential) macro invocation
554 if (t
->type
== tok_operator
&& t
->content
[0] == '@')
556 const string
& name
= t
->content
.substr(1); // strip initial '@'
558 // check if name refers to a real parameter or macro
560 pp1_activation
* act
= pp1_state
.empty() ? 0 : pp1_state
.back();
561 if (act
&& act
->params
.find(name
) != act
->params
.end())
562 decl
= act
->params
[name
];
563 else if (!(act
&& act
->curr_macro
->context
== ctx_library
)
564 && pp1_namespace
.find(name
) != pp1_namespace
.end())
565 decl
= pp1_namespace
[name
];
566 else if (session
.library_macros
.find(name
)
567 != session
.library_macros
.end())
568 decl
= session
.library_macros
[name
];
569 else // this is an ordinary @operator
572 // handle macro invocation, taking ownership of t
573 pp1_activation
*new_act
= new pp1_activation(t
, decl
);
574 unsigned num_params
= decl
->formal_args
.size();
576 // (1a) restore parameter invocation closure
577 if (num_params
== 0 && decl
->is_closure())
579 // NB: decl->parent_act is always safe since the
580 // parameter decl (if any) comes from an activation
581 // record which deeper in the stack than new_act.
583 // decl is a macro parameter which must be evaluated in
584 // the context of the original point of invocation:
585 new_act
->params
= ((pp_macrodecl
*)decl
)->parent_act
->params
;
589 // (1b) consume macro parameters (if any)
593 // for simplicity, we do not allow macro constructs here
594 // -- if we did, we'd have to recursively call scan_pp1()
596 if (! (t
&& t
->type
== tok_operator
&& t
->content
== "("))
599 throw PARSE_ERROR (_NF
600 ("expected '(' in invocation of macro '@%s'"
601 " taking %d parameter",
602 "expected '(' in invocation of macro '@%s'"
603 " taking %d parameters",
604 num_params
, name
.c_str(), num_params
), t
);
607 // XXX perhaps parse/count the full number of params,
608 // so we can say "expected x, found y params" on error?
609 for (unsigned i
= 0; i
< num_params
; i
++)
613 // create parameter closure
614 string param_name
= decl
->formal_args
[i
];
615 pp_macrodecl
* p
= (new_act
->params
[param_name
]
617 p
->tok
= new token(*new_act
->tok
);
619 // NB: *new_act->tok points to invocation, act is NULL at top level
621 t
= slurp_pp1_param (p
->body
);
623 // check correct usage of ',' or ')'
624 if (t
== 0) // hit unexpected EOF or end of macro
626 // XXX could we pop the stack and continue parsing
627 // the invocation, allowing macros to construct new
628 // invocations in piecemeal fashion??
629 const token
* orig_t
= new token(*new_act
->tok
);
631 throw PARSE_ERROR (_("could not find end of macro invocation"), orig_t
);
633 if (t
->type
== tok_operator
&& t
->content
== ",")
635 if (i
+ 1 == num_params
)
638 throw PARSE_ERROR (_F("too many parameters for macro '@%s' (expected %d)", name
.c_str(), num_params
), t
);
641 else if (t
->type
== tok_operator
&& t
->content
== ")")
643 if (i
+ 1 != num_params
)
646 throw PARSE_ERROR (_F("too few parameters for macro '@%s' (expected %d)", name
.c_str(), num_params
), t
);
651 // XXX this is, incidentally, impossible
653 throw PARSE_ERROR(_("expected ',' or ')' after macro parameter"), t
);
659 // (2) set up macro expansion
661 pp1_state
.push_back (new_act
);
663 // Now loop around to look for a real token.
667 // Otherwise, we have an ordinary token.
672 // Consume a single macro invocation's parameters, heeding nesting
673 // brackets and stopping on an unbalanced ')' or an unbracketed ','
674 // (and returning the final separator token).
676 parser::slurp_pp1_param (vector
<const token
*>& param
)
679 unsigned nesting
= 0;
686 // [ needed in case macro paramater is used as prefix for array-deref operation
687 if (t
->type
== tok_operator
&& (t
->content
== "(" || t
->content
== "["))
689 else if (nesting
&& t
->type
== tok_operator
&& (t
->content
== ")" || t
->content
== "]"))
691 else if (!nesting
&& t
->type
== tok_operator
692 && (t
->content
== ")" || t
->content
== ","))
697 return t
; // report ")" or "," or NULL
701 // Consume a macro declaration's body, heeding nested %( %) brackets.
703 parser::slurp_pp1_body (vector
<const token
*>& body
)
706 unsigned nesting
= 0;
713 if (t
->type
== tok_operator
&& t
->content
== "%(")
715 else if (nesting
&& t
->type
== tok_operator
&& t
->content
== "%)")
717 else if (!nesting
&& t
->type
== tok_operator
&& t
->content
== "%)")
722 return t
; // report final "%)" or NULL
725 // Used for parsing .stpm files.
727 parser::parse_library_macros ()
729 stapfile
* f
= new stapfile
;
730 input
.set_current_file (f
);
734 const token
* t
= scan_pp ();
736 // Currently we only take objection to macro invocations if they
737 // produce a non-whitespace token after being expanded.
739 // XXX should we prevent macro invocations even if they expand to empty??
742 throw PARSE_ERROR (_F("unexpected token in library macro file '%s'", input_name
.c_str()), t
);
744 // We need to first check whether *any* of the macros are duplicates,
745 // then commit to including the entire file in the global namespace
747 for (map
<string
, macrodecl
*>::iterator it
= pp1_namespace
.begin();
748 it
!= pp1_namespace
.end(); it
++)
750 string name
= it
->first
;
752 if (session
.library_macros
.find(name
) != session
.library_macros
.end())
754 parse_error
er(ERR_SRC
, _F("duplicate definition of library macro '@%s'", name
.c_str()), it
->second
->tok
);
755 er
.chain
= new PARSE_ERROR (_F("macro '@%s' first defined here", name
.c_str()), session
.library_macros
[name
]->tok
);
765 catch (const parse_error
& pe
)
767 print_error (pe
, errs_as_warnings
);
772 // If no errors, include the entire file. Note how this is outside
773 // of the try-catch block -- no errors possible.
774 for (map
<string
, macrodecl
*>::iterator it
= pp1_namespace
.begin();
775 it
!= pp1_namespace
.end(); it
++)
777 string name
= it
->first
;
779 session
.library_macros
[name
] = it
->second
;
780 session
.library_macros
[name
]->context
= ctx_library
;
786 // Second pass - preprocessor conditional expansion.
788 // The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
789 // where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
790 // or: arch COMPARISON-OP "arch-string"
791 // or: systemtap_v COMPARISON-OP "version-string"
792 // or: systemtap_privilege COMPARISON-OP "privilege-string"
793 // or: CONFIG_foo COMPARISON-OP "config-string"
794 // or: CONFIG_foo COMPARISON-OP number
795 // or: CONFIG_foo COMPARISON-OP CONFIG_bar
796 // or: "string1" COMPARISON-OP "string2"
797 // or: number1 COMPARISON-OP number2
798 // The %: ELSE-TOKENS part is optional.
800 // e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
801 // e.g. %( arch != "i?86" %? "foo" %: "baz" %)
802 // e.g. %( CONFIG_foo %? "foo" %: "baz" %)
804 // Up to an entire %( ... %) expression is processed by a single call
805 // to this function. Tokens included by any nested conditions are
806 // enqueued in a private vector.
808 bool eval_pp_conditional (systemtap_session
& s
,
809 const token
* l
, const token
* op
, const token
* r
)
811 if (l
->type
== tok_identifier
&& (l
->content
== "kernel_v" ||
812 l
->content
== "kernel_vr" ||
813 l
->content
== "systemtap_v"))
815 if (! (r
->type
== tok_string
))
816 throw PARSE_ERROR (_("expected string literal"), r
);
818 string target_kernel_vr
= s
.kernel_release
;
819 string target_kernel_v
= s
.kernel_base_release
;
822 if (l
->content
== "kernel_v") target
= target_kernel_v
;
823 else if (l
->content
== "kernel_vr") target
= target_kernel_vr
;
824 else if (l
->content
== "systemtap_v") target
= s
.compatible
;
827 string query
= r
->content
;
828 bool rhs_wildcard
= (strpbrk (query
.c_str(), "*?[") != 0);
830 // collect acceptable strverscmp results.
831 int rvc_ok1
, rvc_ok2
;
833 if (op
->type
== tok_operator
&& op
->content
== "<=")
834 { rvc_ok1
= -1; rvc_ok2
= 0; }
835 else if (op
->type
== tok_operator
&& op
->content
== ">=")
836 { rvc_ok1
= 1; rvc_ok2
= 0; }
837 else if (op
->type
== tok_operator
&& op
->content
== "<")
838 { rvc_ok1
= -1; rvc_ok2
= -1; }
839 else if (op
->type
== tok_operator
&& op
->content
== ">")
840 { rvc_ok1
= 1; rvc_ok2
= 1; }
841 else if (op
->type
== tok_operator
&& op
->content
== "==")
842 { rvc_ok1
= 0; rvc_ok2
= 0; wc_ok
= true; }
843 else if (op
->type
== tok_operator
&& op
->content
== "!=")
844 { rvc_ok1
= -1; rvc_ok2
= 1; wc_ok
= true; }
846 throw PARSE_ERROR (_("expected comparison operator"), op
);
848 if ((!wc_ok
) && rhs_wildcard
)
849 throw PARSE_ERROR (_("wildcard not allowed with order comparison operators"), op
);
853 int rvc_result
= fnmatch (query
.c_str(), target
.c_str(),
854 FNM_NOESCAPE
); // spooky
855 bool badness
= (rvc_result
== 0) ^ (op
->content
== "==");
860 int rvc_result
= strverscmp (target
.c_str(), query
.c_str());
861 // normalize rvc_result
862 if (rvc_result
< 0) rvc_result
= -1;
863 if (rvc_result
> 0) rvc_result
= 1;
864 return (rvc_result
== rvc_ok1
|| rvc_result
== rvc_ok2
);
867 else if (l
->type
== tok_identifier
&& l
->content
== "systemtap_privilege")
869 string target_privilege
=
870 pr_contains(s
.privilege
, pr_stapdev
) ? "stapdev"
871 : pr_contains(s
.privilege
, pr_stapsys
) ? "stapsys"
872 : pr_contains(s
.privilege
, pr_stapusr
) ? "stapusr"
873 : "none"; /* should be impossible -- s.privilege always one of above */
874 assert(target_privilege
!= "none");
876 if (! (r
->type
== tok_string
))
877 throw PARSE_ERROR (_("expected string literal"), r
);
878 string query_privilege
= r
->content
;
880 bool nomatch
= (target_privilege
!= query_privilege
);
883 if (op
->type
== tok_operator
&& op
->content
== "==")
885 else if (op
->type
== tok_operator
&& op
->content
== "!=")
888 throw PARSE_ERROR (_("expected '==' or '!='"), op
);
889 /* XXX perhaps allow <= >= and similar comparisons */
893 else if (l
->type
== tok_identifier
&& l
->content
== "guru_mode")
895 if (! (r
->type
== tok_number
))
896 throw PARSE_ERROR (_("expected number"), r
);
897 int64_t lhs
= (int64_t) s
.guru_mode
;
898 int64_t rhs
= lex_cast
<int64_t>(r
->content
);
899 if (!((rhs
== 0)||(rhs
== 1)))
900 throw PARSE_ERROR (_("expected 0 or 1"), op
);
901 if (!((op
->type
== tok_operator
&& op
->content
== "==") ||
902 (op
->type
== tok_operator
&& op
->content
== "!=")))
903 throw PARSE_ERROR (_("expected '==' or '!='"), op
);
905 return eval_comparison (lhs
, op
, rhs
);
907 else if (l
->type
== tok_identifier
&& l
->content
== "arch")
909 string target_architecture
= s
.architecture
;
910 if (! (r
->type
== tok_string
))
911 throw PARSE_ERROR (_("expected string literal"), r
);
912 string query_architecture
= r
->content
;
914 int nomatch
= fnmatch (query_architecture
.c_str(),
915 target_architecture
.c_str(),
916 FNM_NOESCAPE
); // still spooky
919 if (op
->type
== tok_operator
&& op
->content
== "==")
921 else if (op
->type
== tok_operator
&& op
->content
== "!=")
924 throw PARSE_ERROR (_("expected '==' or '!='"), op
);
928 else if (l
->type
== tok_identifier
&& l
->content
== "runtime")
930 if (! (r
->type
== tok_string
))
931 throw PARSE_ERROR (_("expected string literal"), r
);
933 string query_runtime
= r
->content
;
934 string target_runtime
;
936 target_runtime
= (s
.runtime_mode
== systemtap_session::dyninst_runtime
937 ? "dyninst" : "kernel");
938 int nomatch
= fnmatch (query_runtime
.c_str(),
939 target_runtime
.c_str(),
940 FNM_NOESCAPE
); // still spooky
943 if (op
->type
== tok_operator
&& op
->content
== "==")
945 else if (op
->type
== tok_operator
&& op
->content
== "!=")
948 throw PARSE_ERROR (_("expected '==' or '!='"), op
);
952 else if (l
->type
== tok_identifier
&& l
->content
.starts_with("CONFIG_"))
954 if (r
->type
== tok_string
)
956 string lhs
= s
.kernel_config
[l
->content
]; // may be empty
957 string rhs
= r
->content
;
959 int nomatch
= fnmatch (rhs
.c_str(), lhs
.c_str(), FNM_NOESCAPE
); // still spooky
962 if (op
->type
== tok_operator
&& op
->content
== "==")
964 else if (op
->type
== tok_operator
&& op
->content
== "!=")
967 throw PARSE_ERROR (_("expected '==' or '!='"), op
);
971 else if (r
->type
== tok_number
)
973 const string
& lhs_string
= s
.kernel_config
[l
->content
];
974 const char* startp
= lhs_string
.c_str ();
975 char* endp
= (char*) startp
;
977 int64_t lhs
= (int64_t) strtoll (startp
, & endp
, 0);
978 if (errno
== ERANGE
|| errno
== EINVAL
|| *endp
!= '\0')
979 throw PARSE_ERROR ("Config option value not a number", l
);
981 int64_t rhs
= lex_cast
<int64_t>(r
->content
);
982 return eval_comparison (lhs
, op
, rhs
);
984 else if (r
->type
== tok_identifier
985 && r
->content
.starts_with( "CONFIG_"))
987 // First try to convert both to numbers,
988 // otherwise threat both as strings.
989 const string
& lhs_string
= s
.kernel_config
[l
->content
];
990 const string
& rhs_string
= s
.kernel_config
[r
->content
];
991 const char* startp
= lhs_string
.c_str ();
992 char* endp
= (char*) startp
;
994 int64_t val
= (int64_t) strtoll (startp
, & endp
, 0);
995 if (errno
!= ERANGE
&& errno
!= EINVAL
&& *endp
== '\0')
998 startp
= rhs_string
.c_str ();
999 endp
= (char*) startp
;
1001 int64_t rhs
= (int64_t) strtoll (startp
, & endp
, 0);
1002 if (errno
!= ERANGE
&& errno
!= EINVAL
&& *endp
== '\0')
1003 return eval_comparison (lhs
, op
, rhs
);
1006 return eval_comparison (lhs_string
, op
, rhs_string
);
1009 throw PARSE_ERROR (_("expected string, number literal or other CONFIG_... as right side operand"), r
);
1011 else if (l
->type
== tok_string
&& r
->type
== tok_string
)
1013 string lhs
= l
->content
;
1014 string rhs
= r
->content
;
1015 return eval_comparison (lhs
, op
, rhs
);
1016 // NB: no wildcarding option here
1018 else if (l
->type
== tok_number
&& r
->type
== tok_number
)
1020 int64_t lhs
= lex_cast
<int64_t>(l
->content
);
1021 int64_t rhs
= lex_cast
<int64_t>(r
->content
);
1022 return eval_comparison (lhs
, op
, rhs
);
1023 // NB: no wildcarding option here
1025 else if (l
->type
== tok_string
&& r
->type
== tok_number
1026 && op
->type
== tok_operator
)
1027 throw PARSE_ERROR (_("expected string literal as right value"), r
);
1028 else if (l
->type
== tok_number
&& r
->type
== tok_string
1029 && op
->type
== tok_operator
)
1030 throw PARSE_ERROR (_("expected number literal as right value"), r
);
1033 throw PARSE_ERROR (_("expected 'arch', 'kernel_v', 'kernel_vr', 'systemtap_v',\n"
1034 " 'runtime', 'systemtap_privilege', 'CONFIG_...', or\n"
1035 " comparison between strings or integers"), l
);
1039 // Only tokens corresponding to the TRUE statement must be expanded
1045 pp_state_t pp
= PP_NONE
;
1046 if (!pp_state
.empty())
1047 pp
= pp_state
.back().second
;
1050 if (pp
== PP_SKIP_THEN
|| pp
== PP_SKIP_ELSE
)
1059 t
= pp_state
.back().first
;
1060 pp_state
.pop_back(); // so skip_some doesn't keep trying to close this
1061 //TRANSLATORS: 'conditional' meaning 'conditional preprocessing'
1062 throw PARSE_ERROR (_("incomplete conditional at end of file"), t
);
1067 // misplaced preprocessor "then"
1068 if (t
->type
== tok_operator
&& t
->content
== "%?")
1069 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t
);
1071 // preprocessor "else"
1072 if (t
->type
== tok_operator
&& t
->content
== "%:")
1075 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t
);
1076 if (pp
== PP_KEEP_ELSE
|| pp
== PP_SKIP_ELSE
)
1077 throw PARSE_ERROR (_("invalid conditional - duplicate '%:'"), t
);
1078 // XXX: here and elsewhere, error cascades might be avoided
1079 // by dropping tokens until we reach the closing %)
1081 pp_state
.back().second
= (pp
== PP_KEEP_THEN
) ?
1082 PP_SKIP_ELSE
: PP_KEEP_ELSE
;
1087 // preprocessor close
1088 if (t
->type
== tok_operator
&& t
->content
== "%)")
1091 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t
);
1092 delete pp_state
.back().first
;
1093 delete t
; //this is the closing bracket
1094 pp_state
.pop_back();
1098 if (! (t
->type
== tok_operator
&& t
->content
== "%(")) // ordinary token
1101 // We have a %( - it's time to throw a preprocessing party!
1103 bool result
= false;
1104 bool and_result
= true;
1105 const token
*n
= NULL
;
1107 const token
*l
, *op
, *r
;
1111 if (l
== 0 || op
== 0 || r
== 0)
1112 throw PARSE_ERROR (_("incomplete condition after '%('"), t
);
1113 // NB: consider generalizing to consume all tokens until %?, and
1114 // passing that as a vector to an evaluator.
1116 // Do not evaluate the condition if we haven't expanded everything.
1117 // This may occur when having several recursive conditionals.
1118 and_result
&= eval_pp_conditional (session
, l
, op
, r
);
1119 if(l
->content
=="systemtap_v")
1130 if (n
&& n
->type
== tok_operator
&& n
->content
== "&&")
1132 result
|= and_result
;
1134 if (! (n
&& n
->type
== tok_operator
&& n
->content
== "||"))
1139 clog << "PP eval (" << *t << ") == " << result << endl;
1143 if (! (m
&& m
->type
== tok_operator
&& m
->content
== "%?"))
1144 throw PARSE_ERROR (_("expected '%?' marker for conditional"), t
);
1147 pp
= result
? PP_KEEP_THEN
: PP_SKIP_THEN
;
1148 pp_state
.push_back (make_pair (t
, pp
));
1150 // Now loop around to look for a real token.
1155 // Skip over tokens and any errors, heeding
1156 // only nested preprocessor starts and ends.
1161 unsigned nesting
= 0;
1166 t
= scan_pp1 (true);
1168 catch (const parse_error
&e
)
1174 if (t
->type
== tok_operator
&& t
->content
== "%(")
1176 else if (nesting
&& t
->type
== tok_operator
&& t
->content
== "%)")
1178 else if (!nesting
&& t
->type
== tok_operator
&&
1179 (t
->content
== "%:" || t
->content
== "%?" || t
->content
== "%)"))
1192 next_t
= scan_pp ();
1194 throw PARSE_ERROR (_("unexpected end-of-file"));
1197 // advance by zeroing next_t
1207 next_t
= scan_pp ();
1209 // don't advance by zeroing next_t
1218 // can only swallow something last peeked or nexted token.
1219 assert (last_t
!= 0);
1221 // advance by zeroing next_t
1222 last_t
= next_t
= 0;
1227 tok_is(token
const * t
, token_type tt
, string
const & expected
)
1229 return t
&& t
->type
== tt
&& t
->content
== expected
;
1234 parser::expect_known (token_type tt
, string
const & expected
)
1236 const token
*t
= next();
1237 if (! (t
&& t
->type
== tt
&& t
->content
== expected
))
1238 throw PARSE_ERROR (_F("expected '%s'", expected
.c_str()));
1239 swallow (); // We are done with it, content was copied.
1244 parser::expect_unknown (token_type tt
, interned_string
& target
)
1246 const token
*t
= next();
1247 if (!(t
&& t
->type
== tt
))
1248 throw PARSE_ERROR (_("expected ") + tt2str(tt
));
1249 target
= t
->content
;
1250 swallow (); // We are done with it, content was copied.
1255 parser::expect_unknown2 (token_type tt1
, token_type tt2
, interned_string
& target
)
1257 const token
*t
= next();
1258 if (!(t
&& (t
->type
== tt1
|| t
->type
== tt2
)))
1259 throw PARSE_ERROR (_F("expected %s or %s", tt2str(tt1
).c_str(), tt2str(tt2
).c_str()));
1260 target
= t
->content
;
1261 swallow (); // We are done with it, content was copied.
1266 parser::expect_op (string
const & expected
)
1268 expect_known (tok_operator
, expected
);
1273 parser::expect_kw (string
const & expected
)
1275 expect_known (tok_keyword
, expected
);
1279 parser::expect_kw_token (string
const & expected
)
1281 const token
*t
= next();
1282 if (! (t
&& t
->type
== tok_keyword
&& t
->content
== expected
))
1283 throw PARSE_ERROR (_F("expected '%s'", expected
.c_str()));
1288 parser::expect_number (int64_t & value
)
1291 const token
*t
= next();
1292 if (t
->type
== tok_operator
&& t
->content
== "-")
1298 if (!(t
&& t
->type
== tok_number
))
1299 throw PARSE_ERROR (_("expected number"));
1301 const string
& s
= t
->content
;
1302 const char* startp
= s
.c_str ();
1303 char* endp
= (char*) startp
;
1305 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1306 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1307 // since the lexer only gives us positive digit strings, but we'll
1308 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1310 value
= (int64_t) strtoull (startp
, & endp
, 0);
1311 if (errno
== ERANGE
|| errno
== EINVAL
|| *endp
!= '\0'
1312 || (neg
&& (unsigned long long) value
> 9223372036854775808ULL)
1313 || (unsigned long long) value
> 18446744073709551615ULL
1314 || value
< -9223372036854775807LL-1)
1315 throw PARSE_ERROR (_("number invalid or out of range"));
1320 swallow (); // We are done with it, content was parsed and copied into value.
1325 parser::expect_ident_or_atword (interned_string
& target
)
1327 const token
*t
= next();
1329 // accept identifiers and operators beginning in '@':
1330 if (!t
|| (t
->type
!= tok_identifier
1331 && (t
->type
!= tok_operator
|| t
->content
[0] != '@')))
1332 // XXX currently this is only called from parse_hist_op_or_bare_name(),
1333 // so the message is accurate, but keep an eye out in the future:
1334 throw PARSE_ERROR (_F("expected %s or statistical operation", tt2str(tok_identifier
).c_str()));
1336 target
= t
->content
;
1342 parser::expect_ident_or_keyword (interned_string
& target
)
1344 expect_unknown2 (tok_identifier
, tok_keyword
, target
);
1349 parser::peek_op (string
const & op
)
1351 return tok_is (peek(), tok_operator
, op
);
1356 parser::peek_kw (string
const & kw
)
1358 return tok_is (peek(), tok_identifier
, kw
);
1363 lexer::lexer (istream
& input
, const string
& in
, systemtap_session
& s
, bool cc
):
1364 ate_comment(false), ate_whitespace(false), saw_tokens(false), check_compatible(cc
),
1365 input_name (in
), input_pointer (0), input_end (0), cursor_suspend_count(0),
1366 cursor_suspend_line (1), cursor_suspend_column (1), cursor_line (1),
1367 cursor_column (1), session(s
), current_file (0), current_token_chain (0)
1369 getline(input
, input_contents
, '\0');
1371 input_pointer
= input_contents
.data();
1372 input_end
= input_contents
.data() + input_contents
.size();
1374 if (keywords
.empty())
1376 // NB: adding new keywords is highly disruptive to the language,
1377 // in particular to existing scripts that could be suddenly
1378 // broken. If done at all, it has to be s.compatible-sensitive,
1379 // and broadly advertised.
1380 keywords
.insert("probe");
1381 keywords
.insert("global");
1382 if (has_version("3.0"))
1383 keywords
.insert("private");
1384 keywords
.insert("function");
1385 keywords
.insert("if");
1386 keywords
.insert("else");
1387 keywords
.insert("for");
1388 keywords
.insert("foreach");
1389 keywords
.insert("in");
1390 keywords
.insert("limit");
1391 keywords
.insert("return");
1392 keywords
.insert("delete");
1393 keywords
.insert("while");
1394 keywords
.insert("break");
1395 keywords
.insert("continue");
1396 keywords
.insert("next");
1397 keywords
.insert("string");
1398 keywords
.insert("long");
1399 keywords
.insert("try");
1400 keywords
.insert("catch");
1403 if (atwords
.empty())
1405 // NB: adding new @words is mildly disruptive to existing
1406 // scripts that define macros with the same name, but not
1407 // really. The user will merely receive a warning that they are
1408 // redefining an existing operator.
1410 // These are inserted without the actual '@', so we can directly check
1411 // proposed macro names without building a string with that prefix.
1412 atwords
.insert("cast");
1413 atwords
.insert("defined");
1414 atwords
.insert("entry");
1415 atwords
.insert("perf");
1416 atwords
.insert("var");
1417 atwords
.insert("avg");
1418 atwords
.insert("count");
1419 atwords
.insert("sum");
1420 atwords
.insert("min");
1421 atwords
.insert("max");
1422 atwords
.insert("hist_linear");
1423 atwords
.insert("hist_log");
1427 unordered_set
<string
> lexer::atwords
;
1430 lexer::set_current_file (stapfile
* f
)
1435 f
->file_contents
= input_contents
;
1436 f
->name
= input_name
;
1441 lexer::set_current_token_chain (const token
* tok
)
1443 current_token_chain
= tok
;
1447 lexer::input_peek (unsigned n
)
1449 if (input_pointer
+ n
>= input_end
)
1451 return (unsigned char)*(input_pointer
+ n
);
1456 lexer::has_version (const char* v
) const
1458 return check_compatible
1459 ? strverscmp(session
.compatible
.c_str(), v
) >= 0
1466 int c
= input_peek();
1467 if (c
< 0) return c
; // EOF
1471 if (cursor_suspend_count
)
1473 // Track effect of input_put: preserve previous cursor/line_column
1474 // until all of its characters are consumed.
1475 if (--cursor_suspend_count
== 0)
1477 cursor_line
= cursor_suspend_line
;
1478 cursor_column
= cursor_suspend_column
;
1483 // update source cursor
1493 // clog << "[" << (char)c << "]";
1499 lexer::input_put (const string
& chars
, const token
* t
)
1501 size_t pos
= input_pointer
- input_contents
.data();
1502 // clog << "[put:" << chars << " @" << pos << "]";
1503 input_contents
.insert (pos
, chars
);
1504 cursor_suspend_count
+= chars
.size();
1505 cursor_suspend_line
= cursor_line
;
1506 cursor_suspend_column
= cursor_column
;
1507 cursor_line
= t
->location
.line
;
1508 cursor_column
= t
->location
.column
;
1509 input_pointer
= input_contents
.data() + pos
;
1510 input_end
= input_contents
.data() + input_contents
.size();
1517 ate_comment
= false; // reset for each new token
1518 ate_whitespace
= false; // reset for each new token
1520 // XXX be very sure to restore old_saw_tokens if we return without a token:
1521 bool old_saw_tokens
= saw_tokens
;
1524 token
* n
= new token
;
1525 string token_str
; // accumulate here instead of by incremental interning
1526 n
->location
.file
= current_file
;
1527 n
->chain
= current_token_chain
;
1530 bool suspended
= (cursor_suspend_count
> 0);
1531 n
->location
.line
= cursor_line
;
1532 n
->location
.column
= cursor_column
;
1534 int c
= input_get();
1535 // clog << "{" << (char)c << (char)c2 << "}";
1539 saw_tokens
= old_saw_tokens
;
1545 ate_whitespace
= true;
1549 int c2
= input_peek ();
1551 // Paste command line arguments as character streams into
1552 // the beginning of a token. $1..$999 go through as raw
1553 // characters; @1..@999 are quoted/escaped as strings.
1554 // $# and @# expand to the number of arguments, similarly
1556 if ((c
== '$' || c
== '@') && (c2
== '#'))
1558 token_str
.push_back (c
);
1559 token_str
.push_back (c2
);
1560 input_get(); // swallow '#'
1564 n
->make_junk(tok_junk_nested_arg
);
1567 size_t num_args
= session
.args
.size ();
1568 input_put ((c
== '$') ? lex_cast (num_args
) : lex_cast_qstring (num_args
), n
);
1572 else if ((c
== '$' || c
== '@') && (isdigit (c2
)))
1575 token_str
.push_back (c
);
1579 token_str
.push_back (c2
);
1580 idx
= (idx
* 10) + (c2
- '0');
1584 idx
<= session
.args
.size()); // prevent overflow
1587 n
->make_junk(tok_junk_nested_arg
);
1591 idx
-1 >= session
.args
.size())
1593 n
->make_junk(tok_junk_invalid_arg
);
1596 const string
& arg
= session
.args
[idx
-1];
1597 input_put ((c
== '$') ? arg
: lex_cast_qstring (arg
), n
);
1602 else if (isalpha (c
) || c
== '$' || c
== '@' || c
== '_')
1604 token_str
= (char) c
;
1605 while (isalnum (c2
) || c2
== '_' || c2
== '$')
1608 token_str
.push_back (c2
);
1611 n
->content
= token_str
;
1613 if (n
->content
[0] == '@')
1614 // makes it easier to detect illegal use of @words:
1615 n
->type
= tok_operator
;
1616 else if (keywords
.count(n
->content
))
1617 n
->type
= tok_keyword
;
1619 n
->type
= tok_identifier
;
1624 else if (isdigit (c
)) // positive literal
1626 n
->type
= tok_number
;
1627 token_str
= (char) c
;
1629 while (isalnum (c2
))
1631 // NB: isalnum is very permissive. We rely on strtol, called in
1632 // parser::parse_literal below, to confirm that the number string
1633 // is correctly formatted and in range.
1636 token_str
.push_back (c2
);
1640 n
->content
= token_str
;
1646 n
->type
= tok_string
;
1651 if (c
< 0 || c
== '\n')
1653 n
->make_junk(tok_junk_unclosed_quote
);
1656 if (c
== '\"') // closing double-quotes
1658 else if (c
== '\\') // see also input_put
1664 if (!has_version("2.3"))
1674 case '0' ... '7': // NB: need only match the first digit
1676 // Pass these escapes through to the string value
1677 // being parsed; it will be emitted into a C literal.
1678 // XXX: PR13371: perhaps we should evaluate them here
1679 // (and re-quote them during translate.cxx emission).
1680 token_str
.push_back ('\\');
1683 default: the_default
:
1684 token_str
.push_back (c
);
1689 token_str
.push_back (c
);
1691 n
->content
= token_str
;
1695 else if (ispunct (c
))
1697 int c3
= input_peek (1);
1699 // NB: if we were to recognize negative numeric literals here,
1700 // we'd introduce another grammar ambiguity:
1701 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
1702 // instead of tok_number(1) tok_operator('-') tok_number(1)
1704 if (c
== '#') // shell comment
1706 unsigned this_line
= cursor_line
;
1707 do { c
= input_get (); }
1708 while (c
>= 0 && cursor_line
== this_line
);
1710 ate_whitespace
= true;
1713 else if ((c
== '/' && c2
== '/')) // C++ comment
1715 unsigned this_line
= cursor_line
;
1716 do { c
= input_get (); }
1717 while (c
>= 0 && cursor_line
== this_line
);
1719 ate_whitespace
= true;
1722 else if (c
== '/' && c2
== '*') // C comment
1724 (void) input_get (); // swallow '*' already in c2
1729 if (c
== '*' && c2
== '/')
1735 ate_whitespace
= true;
1738 else if (c
== '%' && c2
== '{') // embedded code
1740 n
->type
= tok_embedded
;
1741 (void) input_get (); // swallow '{' already in c2
1746 if (c
== '%' && c2
== '}')
1748 n
->content
= token_str
;
1751 if (c
== '}' && c2
== '%') // possible typo
1752 session
.print_warning (_("possible erroneous closing '}%', use '%}'?"), n
);
1753 token_str
.push_back (c
);
1758 n
->make_junk(tok_junk_unclosed_embedded
);
1762 // We're committed to recognizing at least the first character
1764 n
->type
= tok_operator
;
1765 token_str
= (char) c
;
1767 // match all valid operators, in decreasing size order
1768 if ((c
== '<' && c2
== '<' && c3
== '<') ||
1769 (c
== '<' && c2
== '<' && c3
== '=') ||
1770 (c
== '>' && c2
== '>' && c3
== '='))
1772 token_str
.push_back (c2
);
1773 token_str
.push_back (c3
);
1777 else if ((c
== '=' && c2
== '=') ||
1778 (c
== '!' && c2
== '=') ||
1779 (c
== '<' && c2
== '=') ||
1780 (c
== '>' && c2
== '=') ||
1781 (c
== '=' && c2
== '~') ||
1782 (c
== '!' && c2
== '~') ||
1783 (c
== '+' && c2
== '=') ||
1784 (c
== '-' && c2
== '=') ||
1785 (c
== '*' && c2
== '=') ||
1786 (c
== '/' && c2
== '=') ||
1787 (c
== '%' && c2
== '=') ||
1788 (c
== '&' && c2
== '=') ||
1789 (c
== '^' && c2
== '=') ||
1790 (c
== '|' && c2
== '=') ||
1791 (c
== '.' && c2
== '=') ||
1792 (c
== '&' && c2
== '&') ||
1793 (c
== '|' && c2
== '|') ||
1794 (c
== '+' && c2
== '+') ||
1795 (c
== '-' && c2
== '-') ||
1796 (c
== '-' && c2
== '>') ||
1797 (c
== '<' && c2
== '<') ||
1798 (c
== '>' && c2
== '>') ||
1799 // preprocessor tokens
1800 (c
== '%' && c2
== '(') ||
1801 (c
== '%' && c2
== '?') ||
1802 (c
== '%' && c2
== ':') ||
1803 (c
== '%' && c2
== ')'))
1805 token_str
.push_back (c2
);
1806 input_get (); // swallow other character
1809 n
->content
= token_str
;
1817 s
<< "\\x" << hex
<< setw(2) << setfill('0') << c
;
1818 n
->content
= s
.str();
1819 // signal parser to emit "expected X, found junk" type error
1820 n
->make_junk(tok_junk_unknown
);
1825 // ------------------------------------------------------------------------
1828 token::make_junk (token_junk_type junk
)
1834 // ------------------------------------------------------------------------
1837 token::junk_message(systemtap_session
& session
) const
1841 case tok_junk_nested_arg
:
1842 return _("invalid nested substitution of command line arguments");
1844 case tok_junk_invalid_arg
:
1845 return _F("command line argument out of range [1-%lu]",
1846 (unsigned long) session
.args
.size());
1848 case tok_junk_unclosed_quote
:
1849 return _("Could not find matching closing quote");
1851 case tok_junk_unclosed_embedded
:
1852 return _("Could not find matching '%}' to close embedded function block");
1855 return _("unknown junk token");
1859 // ------------------------------------------------------------------------
1864 stapfile
* f
= new stapfile
;
1865 input
.set_current_file (f
);
1873 systemtap_v_seen
= 0;
1874 const token
* t
= peek ();
1875 if (! t
) // nice clean EOF, modulo any preprocessing that occurred
1879 if (t
->type
== tok_keyword
&& t
->content
== "probe")
1881 context
= con_probe
;
1882 parse_probe (f
->probes
, f
->aliases
);
1884 else if (t
->type
== tok_keyword
&& t
->content
== "private")
1886 context
= con_unknown
;
1887 parse_private (f
->globals
, f
->probes
, f
->name
, f
->functions
);
1889 else if (t
->type
== tok_keyword
&& t
->content
== "global")
1891 context
= con_global
;
1892 parse_global (f
->globals
, f
->probes
, f
->name
);
1894 else if (t
->type
== tok_keyword
&& t
->content
== "function")
1896 context
= con_function
;
1897 parse_functiondecl (f
->functions
, f
->name
);
1899 else if (t
->type
== tok_embedded
)
1901 context
= con_embedded
;
1902 f
->embeds
.push_back (parse_embeddedcode ());
1906 context
= con_unknown
;
1907 throw PARSE_ERROR (_("expected 'probe', 'global', 'private', 'function', or '%{'"));
1910 catch (parse_error
& pe
)
1912 print_error (pe
, errs_as_warnings
);
1914 // XXX: do we want tok_junk to be able to force skip_some behaviour?
1915 if (pe
.skip_some
) // for recovery
1916 // Quietly swallow all tokens until the next keyword we can start parsing from.
1921 const token
* t
= peek ();
1924 if (t
->type
== tok_keyword
&& t
->content
== "probe") break;
1925 else if (t
->type
== tok_keyword
&& t
->content
== "private") break;
1926 else if (t
->type
== tok_keyword
&& t
->content
== "global") break;
1927 else if (t
->type
== tok_keyword
&& t
->content
== "function") break;
1928 else if (t
->type
== tok_embedded
) break;
1929 swallow (); // swallow it
1932 catch (parse_error
& pe2
)
1934 // parse error during recovery ... ugh
1942 // vary message depending on whether file was *actually* empty:
1943 cerr
<< (input
.saw_tokens
1944 ? _F("Input file '%s' is empty after preprocessing.", input_name
.c_str())
1945 : _F("Input file '%s' is empty.", input_name
.c_str()))
1950 else if (num_errors
> 0)
1952 cerr
<< _NF("%d parse error.", "%d parse errors.", num_errors
, num_errors
) << endl
;
1957 input
.set_current_file(0);
1963 parser::parse_synthetic_probe (const token
* chain
)
1966 stapfile
* f
= new stapfile
;
1967 f
->synthetic
= true;
1968 input
.set_current_file (f
);
1969 input
.set_current_token_chain (chain
);
1973 context
= con_probe
;
1974 parse_probe (f
->probes
, f
->aliases
);
1976 if (f
->probes
.size() != 1 || !f
->aliases
.empty())
1977 throw PARSE_ERROR (_("expected a single synthetic probe"));
1980 catch (parse_error
& pe
)
1982 print_error (pe
, errs_as_warnings
);
1985 // TODO check for unparsed tokens?
1987 input
.set_current_file(0);
1988 input
.set_current_token_chain(0);
1994 parser::parse_probe (vector
<probe
*> & probe_ret
,
1995 vector
<probe_alias
*> & alias_ret
)
1997 const token
* t0
= next ();
1998 if (! (t0
->type
== tok_keyword
&& t0
->content
== "probe"))
1999 throw PARSE_ERROR (_("expected 'probe'"));
2001 vector
<probe_point
*> aliases
;
2002 vector
<probe_point
*> locations
;
2004 int epilogue_alias
= 0;
2008 vector
<probe_point
*> pps
= parse_probe_points();
2010 const token
* t
= peek ();
2011 if (pps
.size() == 1 && t
2012 && t
->type
== tok_operator
&& t
->content
== "=")
2014 if (pps
[0]->optional
|| pps
[0]->sufficient
)
2015 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps
[0]->components
.front()->tok
);
2016 aliases
.push_back(pps
[0]);
2020 else if (pps
.size() == 1 && t
2021 && t
->type
== tok_operator
&& t
->content
== "+=")
2023 if (pps
[0]->optional
|| pps
[0]->sufficient
)
2024 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps
[0]->components
.front()->tok
);
2025 aliases
.push_back(pps
[0]);
2030 else if (t
&& t
->type
== tok_operator
&& t
->content
== "{")
2032 locations
.insert(locations
.end(), pps
.begin(), pps
.end());
2036 throw PARSE_ERROR (_("expected probe point specifier"));
2039 if (aliases
.empty())
2041 probe
* p
= new probe
;
2043 p
->locations
= locations
;
2044 p
->body
= parse_stmt_block ();
2045 p
->privileged
= privileged
;
2046 p
->systemtap_v_conditional
= systemtap_v_seen
;
2047 probe_ret
.push_back (p
);
2051 probe_alias
* p
= new probe_alias (aliases
);
2053 p
->epilogue_style
= true;
2055 p
->epilogue_style
= false;
2057 p
->locations
= locations
;
2058 p
->body
= parse_stmt_block ();
2059 p
->privileged
= privileged
;
2060 p
->systemtap_v_conditional
= systemtap_v_seen
;
2061 alias_ret
.push_back (p
);
2067 parser::parse_embeddedcode ()
2069 embeddedcode
* e
= new embeddedcode
;
2070 const token
* t
= next ();
2071 if (t
->type
!= tok_embedded
)
2072 throw PARSE_ERROR (_("expected '%{'"));
2075 throw PARSE_ERROR (_("embedded code in unprivileged script; need stap -g"),
2076 false /* don't skip tokens for parse resumption */);
2079 e
->code
= t
->content
;
2085 parser::parse_stmt_block ()
2087 block
* pb
= new block
;
2089 const token
* t
= next ();
2090 if (! (t
->type
== tok_operator
&& t
->content
== "{"))
2091 throw PARSE_ERROR (_("expected '{'"));
2098 if (t
&& t
->type
== tok_operator
&& t
->content
== "}")
2103 pb
->statements
.push_back (parse_statement ());
2111 parser::parse_try_block ()
2113 try_block
* pb
= new try_block
;
2115 pb
->tok
= expect_kw_token ("try");
2116 pb
->try_block
= parse_stmt_block();
2117 expect_kw ("catch");
2119 const token
* t
= peek ();
2120 if (t
!= NULL
&& t
->type
== tok_operator
&& t
->content
== "(")
2122 swallow (); // swallow the '('
2125 if (! (t
->type
== tok_identifier
))
2126 throw PARSE_ERROR (_("expected identifier"));
2127 symbol
* sym
= new symbol
;
2129 sym
->name
= t
->content
;
2130 pb
->catch_error_var
= sym
;
2135 pb
->catch_error_var
= 0;
2137 pb
->catch_block
= parse_stmt_block();
2145 parser::parse_statement ()
2148 const token
* t
= peek ();
2149 if (t
&& t
->type
== tok_operator
&& t
->content
== ";")
2150 return new null_statement (next ());
2151 else if (t
&& t
->type
== tok_operator
&& t
->content
== "{")
2152 return parse_stmt_block (); // Don't squash semicolons.
2153 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "try")
2154 return parse_try_block (); // Don't squash semicolons.
2155 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "if")
2156 return parse_if_statement (); // Don't squash semicolons.
2157 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "for")
2158 return parse_for_loop (); // Don't squash semicolons.
2159 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "foreach")
2160 return parse_foreach_loop (); // Don't squash semicolons.
2161 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "while")
2162 return parse_while_loop (); // Don't squash semicolons.
2163 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "return")
2164 ret
= parse_return_statement ();
2165 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "delete")
2166 ret
= parse_delete_statement ();
2167 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "break")
2168 ret
= parse_break_statement ();
2169 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "continue")
2170 ret
= parse_continue_statement ();
2171 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "next")
2172 ret
= parse_next_statement ();
2173 else if (t
&& (t
->type
== tok_operator
|| // expressions are flexible
2174 t
->type
== tok_identifier
||
2175 t
->type
== tok_number
||
2176 t
->type
== tok_string
||
2177 t
->type
== tok_embedded
))
2178 ret
= parse_expr_statement ();
2179 // XXX: consider generally accepting tok_embedded here too
2181 throw PARSE_ERROR (_("expected statement"));
2183 // Squash "empty" trailing colons after any "non-block-like" statement.
2185 if (t
&& t
->type
== tok_operator
&& t
->content
== ";")
2187 swallow (); // Silently eat trailing ; after statement
2194 parser::parse_private (vector
<vardecl
*>& globals
, vector
<probe
*>& probes
,
2195 string
const & fname
, vector
<functiondecl
*>& functions
)
2197 const token
* t
= next ();
2198 if (! (t
->type
== tok_keyword
&& t
->content
== "private"))
2199 throw PARSE_ERROR (_("expected 'private'"));
2202 if (t
->type
== tok_keyword
&& t
->content
== "function")
2205 context
= con_function
;
2206 do_parse_functiondecl(functions
, t
, fname
, true);
2208 else if (t
->type
== tok_keyword
&& t
->content
== "global")
2211 context
= con_global
;
2213 if (! (t
->type
== tok_identifier
))
2214 throw PARSE_ERROR (_("expected identifier"));
2215 do_parse_global(globals
, probes
, fname
, t
, true);
2217 // The `private <identifier>` is an acceptable shorthand
2218 // for `private global <identifier>` per above.
2219 else if (t
->type
== tok_identifier
)
2221 context
= con_global
;
2222 do_parse_global(globals
, probes
, fname
, t
, true);
2225 throw PARSE_ERROR (_("expected 'function' or identifier"));
2229 parser::parse_global (vector
<vardecl
*>& globals
, vector
<probe
*>& probes
,
2230 string
const & fname
)
2232 const token
* t0
= next ();
2233 if (! (t0
->type
== tok_keyword
&& t0
->content
== "global"))
2234 throw PARSE_ERROR (_("expected 'global' or 'private'"));
2236 do_parse_global(globals
, probes
, fname
, 0, false);
2240 parser::do_parse_global (vector
<vardecl
*>& globals
, vector
<probe
*>&,
2241 string
const & fname
, const token
* t0
, bool priv
)
2247 t
= (iter0
&& priv
) ? t0
: next ();
2249 if (! (t
->type
== tok_identifier
))
2250 throw PARSE_ERROR (_("expected identifier"));
2252 string gname
= "__global_" + string(t
->content
);
2253 string pname
= "__private_" + detox_path(fname
) + string(t
->content
);
2254 string name
= priv
? pname
: gname
;
2256 for (unsigned i
=0; i
<globals
.size(); i
++)
2258 if (globals
[i
]->name
== name
)
2259 throw PARSE_ERROR (_("duplicate global name"));
2260 if ((globals
[i
]->name
== gname
) || (globals
[i
]->name
== pname
))
2261 throw PARSE_ERROR (_("global versus private variable declaration conflict"));
2264 vardecl
* d
= new vardecl
;
2265 d
->unmangled_name
= t
->content
;
2268 d
->systemtap_v_conditional
= systemtap_v_seen
;
2269 globals
.push_back (d
);
2273 if(t
&& t
->type
== tok_operator
&& t
->content
== "%") //wrapping
2280 if (t
&& t
->type
== tok_operator
&& t
->content
== "[") // array size
2284 expect_number(size
);
2285 if (size
<= 0 || size
> 1000000) // arbitrary max
2286 throw PARSE_ERROR(_("array size out of range"));
2287 d
->maxsize
= (int)size
;
2288 expect_known(tok_operator
, "]");
2292 if (t
&& t
->type
== tok_operator
&& t
->content
== "=") // initialization
2294 if (!d
->compatible_arity(0))
2295 throw PARSE_ERROR(_("only scalar globals can be initialized"));
2297 next (); // Don't swallow, set_arity() used the peeked token.
2298 d
->init
= parse_literal ();
2299 d
->type
= d
->init
->type
;
2303 if (t
&& t
->type
== tok_operator
&& t
->content
== ";") // termination
2309 if (t
&& t
->type
== tok_operator
&& t
->content
== ",") // next global
2320 parser::parse_functiondecl (vector
<functiondecl
*>& functions
,
2321 string
const & fname
)
2323 const token
* t
= next ();
2324 if (! (t
->type
== tok_keyword
&& t
->content
== "function"))
2325 throw PARSE_ERROR (_("expected 'function'"));
2327 do_parse_functiondecl(functions
, t
, fname
, false);
2331 parser::do_parse_functiondecl (vector
<functiondecl
*>& functions
, const token
* t
,
2332 string
const & fname
, bool priv
)
2335 if (! (t
->type
== tok_identifier
)
2336 && ! (t
->type
== tok_keyword
2337 && (t
->content
== "string" || t
->content
== "long")))
2338 throw PARSE_ERROR (_("expected identifier"));
2340 string gname
= "__global_" + string(t
->content
);
2341 string pname
= "__private_" + detox_path(fname
) + string(t
->content
);
2342 string name
= priv
? pname
: gname
;
2343 name
+= "__overload_" + lex_cast(session
.overload_count
[t
->content
]++);
2345 functiondecl
*fd
= new functiondecl ();
2346 fd
->unmangled_name
= t
->content
;
2351 if (t
->type
== tok_operator
&& t
->content
== ":")
2355 if (t
->type
== tok_keyword
&& t
->content
== "string")
2356 fd
->type
= pe_string
;
2357 else if (t
->type
== tok_keyword
&& t
->content
== "long")
2359 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2365 if (! (t
->type
== tok_operator
&& t
->content
== "("))
2366 throw PARSE_ERROR (_("expected '('"));
2373 // permit zero-argument functions
2374 if (t
->type
== tok_operator
&& t
->content
== ")")
2379 else if (! (t
->type
== tok_identifier
))
2380 throw PARSE_ERROR (_("expected identifier"));
2381 vardecl
* vd
= new vardecl
;
2382 vd
->unmangled_name
= vd
->name
= t
->content
;
2384 fd
->formal_args
.push_back (vd
);
2385 fd
->systemtap_v_conditional
= systemtap_v_seen
;
2388 if (t
->type
== tok_operator
&& t
->content
== ":")
2392 if (t
->type
== tok_keyword
&& t
->content
== "string")
2393 vd
->type
= pe_string
;
2394 else if (t
->type
== tok_keyword
&& t
->content
== "long")
2396 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
2400 if (t
->type
== tok_operator
&& t
->content
== ")")
2405 if (t
->type
== tok_operator
&& t
->content
== ",")
2411 throw PARSE_ERROR (_("expected ',' or ')'"));
2415 if (t
->type
== tok_operator
&& t
->content
== ":")
2419 expect_number(priority
);
2420 fd
->priority
= priority
;
2421 // reserve priority 0 for user script implementation
2423 throw PARSE_ERROR (_("specified priority must be > 0"));
2427 // allow script file implementation override automatically when
2428 // priority not specified
2433 if (t
&& t
->type
== tok_embedded
)
2434 fd
->body
= parse_embeddedcode ();
2436 fd
->body
= parse_stmt_block ();
2438 functions
.push_back (fd
);
2441 vector
<probe_point
*>
2442 parser::parse_probe_points()
2444 vector
<probe_point
*> pps
;
2447 vector
<probe_point
*> tail
= parse_components();
2448 pps
.insert(pps
.end(), tail
.begin(), tail
.end());
2450 const token
* t
= peek();
2451 if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
2457 if (t
&& t
->type
== tok_operator
2458 && (t
->content
== "{" || t
->content
== "=" ||
2459 t
->content
== "+="|| t
->content
== "}"))
2462 throw PARSE_ERROR (_("expected one of ', { } = +='"));
2467 vector
<probe_point
*>
2468 parser::parse_components()
2470 vector
<probe_point
*> pps
;
2473 vector
<probe_point
*> suffix
= parse_component();
2475 // Cartesian product of components
2480 assert(!suffix
.empty());
2481 vector
<probe_point
*> product
;
2482 for (unsigned i
= 0; i
< pps
.size(); i
++)
2484 if (pps
[i
]->optional
|| pps
[i
]->sufficient
|| pps
[i
]->condition
)
2485 throw PARSE_ERROR (_("'?', '!' or condition must only be specified in suffix"),
2486 pps
[i
]->components
[0]->tok
);
2487 for (unsigned j
= 0; j
< suffix
.size(); j
++)
2489 probe_point
* pp
= new probe_point
;
2490 pp
->components
.insert(pp
->components
.end(),
2491 pps
[i
]->components
.begin(), pps
[i
]->components
.end());
2492 pp
->components
.insert(pp
->components
.end(),
2493 suffix
[j
]->components
.begin(), suffix
[j
]->components
.end());
2494 pp
->optional
= suffix
[j
]->optional
;
2495 pp
->sufficient
= suffix
[j
]->sufficient
;
2496 pp
->condition
= suffix
[j
]->condition
;
2497 product
.push_back(pp
);
2500 for (unsigned i
= 0; i
< pps
.size(); i
++) delete pps
[i
];
2501 for (unsigned i
= 0; i
< suffix
.size(); i
++) delete suffix
[i
];
2505 const token
* t
= peek();
2506 if (t
&& t
->type
== tok_operator
&& t
->content
== ".")
2512 // We only fall through here at the end of a probe point (past
2513 // all the dotted/parametrized components).
2515 if (t
&& t
->type
== tok_operator
&&
2516 (t
->content
== "?" || t
->content
== "!"))
2518 for (unsigned i
= 0; i
< pps
.size(); i
++)
2520 if (pps
[i
]->optional
|| pps
[i
]->sufficient
)
2521 throw PARSE_ERROR (_("'?' or '!' respecified"));
2522 pps
[i
]->optional
= true;
2523 if (t
->content
== "!") pps
[i
]->sufficient
= true;
2525 // NB: sufficient implies optional
2531 if (t
&& t
->type
== tok_keyword
&& t
->content
== "if")
2535 if (!(t
&& t
->type
== tok_operator
&& t
->content
== "("))
2536 throw PARSE_ERROR (_("expected '('"));
2539 expression
* e
= parse_expression();
2540 for (unsigned i
= 0; i
< pps
.size(); i
++)
2542 if (pps
[i
]->condition
!= 0)
2543 throw PARSE_ERROR (_("condition respecified"));
2544 pps
[i
]->condition
= e
;
2548 if (!(t
&& t
->type
== tok_operator
&& t
->content
== ")"))
2549 throw PARSE_ERROR (_("expected ')'"));
2558 vector
<probe_point
*>
2559 parser::parse_component()
2561 const token
* t
= next ();
2562 if (! (t
->type
== tok_identifier
2563 // we must allow ".return" and ".function", which are keywords
2564 || t
->type
== tok_keyword
2565 // we must allow "*", due to being an operator
2566 || (t
->type
== tok_operator
&& (t
->content
== "*" || t
->content
== "{"))))
2567 throw PARSE_ERROR (_("expected identifier or '*' or '{'"));
2569 if (t
&& t
->type
== tok_operator
&& t
->content
== "{")
2572 vector
<probe_point
*> pps
= parse_probe_points();
2574 if (!(t
&& t
->type
== tok_operator
&& t
->content
== "}"))
2575 throw PARSE_ERROR (_("expected '}'"));
2581 // loop which reconstitutes an identifier with wildcards
2582 string content
= t
->content
;
2583 bool changed_p
= false;
2586 const token
* u
= peek();
2589 // ensure pieces of the identifier are adjacent:
2590 if (input
.ate_whitespace
)
2592 // ensure pieces of the identifier are valid:
2593 if (! (u
->type
== tok_identifier
2594 // we must allow arbitrary keywords with a wildcard
2595 || u
->type
== tok_keyword
2596 // we must allow "*", due to being an operator
2597 || (u
->type
== tok_operator
&& u
->content
== "*")))
2601 content
= content
+ (string
)u
->content
;
2610 // We've already swallowed the first token and we're not
2611 // putting it back; no one else has a copy; so we can
2612 // safely overwrite its content and reuse it.
2613 const_cast<token
*>(t
)->content
= content
;
2616 probe_point::component
* c
= new probe_point::component
;
2617 c
->functor
= t
->content
;
2619 vector
<probe_point
*> pps
;
2620 probe_point
* pp
= new probe_point
;
2621 pp
->components
.push_back(c
);
2623 // NB we may add c->arg soon
2627 // consume optional parameter
2628 if (t
&& t
->type
== tok_operator
&& t
->content
== "(")
2630 swallow (); // consume "("
2631 c
->arg
= parse_literal ();
2634 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
2635 throw PARSE_ERROR (_("expected ')'"));
2643 parser::consume_string_literals(const token
*t
)
2645 literal_string
*ls
= new literal_string (t
->content
);
2647 // PR11208: check if the next token is also a string literal;
2648 // auto-concatenate it. This is complicated to the extent that we
2649 // need to skip intermediate whitespace.
2651 // NB for versions prior to 2.0: but don't skip over intervening comments
2653 bool p_concat
= false;
2654 const token
*n
= peek();
2655 while (n
!= NULL
&& n
->type
== tok_string
2656 && ! (!input
.has_version("2.0") && input
.ate_comment
))
2660 concat
= t
->content
;
2663 concat
.append(n
->content
.data(), n
->content
.size());
2664 next(); // consume the token
2673 // Parse a string literal and perform backslash escaping on the contents:
2675 parser::parse_literal_string ()
2677 const token
* t
= next ();
2679 if (t
->type
== tok_string
)
2680 l
= consume_string_literals (t
);
2682 throw PARSE_ERROR (_("expected literal string"));
2690 parser::parse_literal ()
2692 const token
* t
= next ();
2694 if (t
->type
== tok_string
)
2696 l
= consume_string_literals (t
);
2701 if (t
->type
== tok_operator
&& t
->content
== "-")
2708 if (t
->type
== tok_number
)
2710 const string
& s
= t
->content
;
2711 const char* startp
= s
.c_str ();
2712 char* endp
= (char*) startp
;
2714 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
2715 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
2716 // since the lexer only gives us positive digit strings, but we'll
2717 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
2719 long long value
= (long long) strtoull (startp
, & endp
, 0);
2720 if (errno
== ERANGE
|| errno
== EINVAL
|| *endp
!= '\0'
2721 || (neg
&& (unsigned long long) value
> 9223372036854775808ULL)
2722 || (unsigned long long) value
> 18446744073709551615ULL
2723 || value
< -9223372036854775807LL-1)
2724 throw PARSE_ERROR (_("number invalid or out of range"));
2729 l
= new literal_number (value
);
2732 throw PARSE_ERROR (_("expected literal string or number"));
2741 parser::parse_if_statement ()
2743 const token
* t
= next ();
2744 if (! (t
->type
== tok_keyword
&& t
->content
== "if"))
2745 throw PARSE_ERROR (_("expected 'if'"));
2746 if_statement
* s
= new if_statement
;
2750 if (! (t
->type
== tok_operator
&& t
->content
== "("))
2751 throw PARSE_ERROR (_("expected '('"));
2754 s
->condition
= parse_expression ();
2757 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
2758 throw PARSE_ERROR (_("expected ')'"));
2761 s
->thenblock
= parse_statement ();
2764 if (t
&& t
->type
== tok_keyword
&& t
->content
== "else")
2767 s
->elseblock
= parse_statement ();
2770 s
->elseblock
= 0; // in case not otherwise initialized
2777 parser::parse_expr_statement ()
2779 expr_statement
*es
= new expr_statement
;
2780 const token
* t
= peek ();
2782 throw PARSE_ERROR (_("expression statement expected"));
2783 // Copy, we only peeked, parse_expression might swallow.
2784 es
->tok
= new token (*t
);
2785 es
->value
= parse_expression ();
2791 parser::parse_return_statement ()
2793 const token
* t
= next ();
2794 if (! (t
->type
== tok_keyword
&& t
->content
== "return"))
2795 throw PARSE_ERROR (_("expected 'return'"));
2796 if (context
!= con_function
)
2797 throw PARSE_ERROR (_("found 'return' not in function context"));
2798 return_statement
* s
= new return_statement
;
2800 s
->value
= parse_expression ();
2806 parser::parse_delete_statement ()
2808 const token
* t
= next ();
2809 if (! (t
->type
== tok_keyword
&& t
->content
== "delete"))
2810 throw PARSE_ERROR (_("expected 'delete'"));
2811 delete_statement
* s
= new delete_statement
;
2813 s
->value
= parse_expression ();
2819 parser::parse_next_statement ()
2821 const token
* t
= next ();
2822 if (! (t
->type
== tok_keyword
&& t
->content
== "next"))
2823 throw PARSE_ERROR (_("expected 'next'"));
2824 next_statement
* s
= new next_statement
;
2831 parser::parse_break_statement ()
2833 const token
* t
= next ();
2834 if (! (t
->type
== tok_keyword
&& t
->content
== "break"))
2835 throw PARSE_ERROR (_("expected 'break'"));
2836 break_statement
* s
= new break_statement
;
2843 parser::parse_continue_statement ()
2845 const token
* t
= next ();
2846 if (! (t
->type
== tok_keyword
&& t
->content
== "continue"))
2847 throw PARSE_ERROR (_("expected 'continue'"));
2848 continue_statement
* s
= new continue_statement
;
2855 parser::parse_for_loop ()
2857 const token
* t
= next ();
2858 if (! (t
->type
== tok_keyword
&& t
->content
== "for"))
2859 throw PARSE_ERROR (_("expected 'for'"));
2860 for_loop
* s
= new for_loop
;
2864 if (! (t
->type
== tok_operator
&& t
->content
== "("))
2865 throw PARSE_ERROR (_("expected '('"));
2868 // initializer + ";"
2870 if (t
&& t
->type
== tok_operator
&& t
->content
== ";")
2877 s
->init
= parse_expr_statement ();
2879 if (! (t
->type
== tok_operator
&& t
->content
== ";"))
2880 throw PARSE_ERROR (_("expected ';'"));
2886 if (t
&& t
->type
== tok_operator
&& t
->content
== ";")
2888 literal_number
* l
= new literal_number(1);
2890 s
->cond
->tok
= next ();
2894 s
->cond
= parse_expression ();
2896 if (! (t
->type
== tok_operator
&& t
->content
== ";"))
2897 throw PARSE_ERROR (_("expected ';'"));
2903 if (t
&& t
->type
== tok_operator
&& t
->content
== ")")
2910 s
->incr
= parse_expr_statement ();
2912 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
2913 throw PARSE_ERROR (_("expected ')'"));
2918 s
->block
= parse_statement ();
2925 parser::parse_while_loop ()
2927 const token
* t
= next ();
2928 if (! (t
->type
== tok_keyword
&& t
->content
== "while"))
2929 throw PARSE_ERROR (_("expected 'while'"));
2930 for_loop
* s
= new for_loop
;
2934 if (! (t
->type
== tok_operator
&& t
->content
== "("))
2935 throw PARSE_ERROR (_("expected '('"));
2938 // dummy init and incr fields
2943 s
->cond
= parse_expression ();
2946 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
2947 throw PARSE_ERROR (_("expected ')'"));
2951 s
->block
= parse_statement ();
2958 parser::parse_foreach_loop ()
2960 const token
* t
= next ();
2961 if (! (t
->type
== tok_keyword
&& t
->content
== "foreach"))
2962 throw PARSE_ERROR (_("expected 'foreach'"));
2963 foreach_loop
* s
= new foreach_loop
;
2965 s
->sort_direction
= 0;
2966 s
->sort_aggr
= sc_none
;
2971 if (! (t
->type
== tok_operator
&& t
->content
== "("))
2972 throw PARSE_ERROR (_("expected '('"));
2975 symbol
* lookahead_sym
= NULL
;
2976 int lookahead_sort
= 0;
2979 if (t
&& t
->type
== tok_identifier
)
2982 lookahead_sym
= new symbol
;
2983 lookahead_sym
->tok
= t
;
2984 lookahead_sym
->name
= t
->content
;
2987 if (t
&& t
->type
== tok_operator
&&
2988 (t
->content
== "+" || t
->content
== "-"))
2990 lookahead_sort
= (t
->content
== "+") ? 1 : -1;
2995 if (t
&& t
->type
== tok_operator
&& t
->content
== "=")
2998 s
->value
= lookahead_sym
;
3001 s
->sort_direction
= lookahead_sort
;
3004 lookahead_sym
= NULL
;
3008 // see also parse_array_in
3010 bool parenthesized
= false;
3012 if (!lookahead_sym
&& t
&& t
->type
== tok_operator
&& t
->content
== "[")
3015 parenthesized
= true;
3020 s
->indexes
.push_back (lookahead_sym
);
3023 s
->sort_direction
= lookahead_sort
;
3026 lookahead_sym
= NULL
;
3031 if (! (t
->type
== tok_identifier
))
3032 throw PARSE_ERROR (_("expected identifier"));
3033 symbol
* sym
= new symbol
;
3035 sym
->name
= t
->content
;
3036 s
->indexes
.push_back (sym
);
3039 if (t
&& t
->type
== tok_operator
&&
3040 (t
->content
== "+" || t
->content
== "-"))
3042 if (s
->sort_direction
)
3043 throw PARSE_ERROR (_("multiple sort directives"));
3044 s
->sort_direction
= (t
->content
== "+") ? 1 : -1;
3045 s
->sort_column
= s
->indexes
.size();
3052 if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
3057 else if (t
&& t
->type
== tok_operator
&& t
->content
== "]")
3063 throw PARSE_ERROR (_("expected ',' or ']'"));
3066 break; // expecting only one expression
3070 if (! (t
->type
== tok_keyword
&& t
->content
== "in"))
3071 throw PARSE_ERROR (_("expected 'in'"));
3074 s
->base
= parse_indexable();
3076 // check if there was an array slice that was specified
3078 if (t
&& t
->type
== tok_operator
&& t
->content
== "[")
3084 if (t
&& t
->type
== tok_operator
&& t
->content
== "*")
3087 s
->array_slice
.push_back (NULL
);
3090 s
->array_slice
.push_back (parse_expression());
3093 if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
3098 else if (t
&& t
->type
== tok_operator
&& t
->content
== "]")
3104 throw PARSE_ERROR (_("expected ',' or ']'"));
3109 // check for atword, see also expect_ident_or_atword,
3111 if (t
&& t
->type
== tok_operator
&& t
->content
[0] == '@')
3113 if (t
->content
== "@avg") s
->sort_aggr
= sc_average
;
3114 else if (t
->content
== "@min") s
->sort_aggr
= sc_min
;
3115 else if (t
->content
== "@max") s
->sort_aggr
= sc_max
;
3116 else if (t
->content
== "@count") s
->sort_aggr
= sc_count
;
3117 else if (t
->content
== "@sum") s
->sort_aggr
= sc_sum
;
3118 else throw PARSE_ERROR(_("expected statistical operation"));
3122 if (! (t
&& t
->type
== tok_operator
&& (t
->content
== "+" || t
->content
== "-")))
3123 throw PARSE_ERROR(_("expected sort directive"));
3127 if (t
&& t
->type
== tok_operator
&&
3128 (t
->content
== "+" || t
->content
== "-"))
3130 if (s
->sort_direction
)
3131 throw PARSE_ERROR (_("multiple sort directives"));
3132 s
->sort_direction
= (t
->content
== "+") ? 1 : -1;
3138 if (tok_is(t
, tok_keyword
, "limit"))
3140 swallow (); // get past the "limit"
3141 s
->limit
= parse_expression ();
3145 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
3146 throw PARSE_ERROR ("expected ')'");
3149 s
->block
= parse_statement ();
3155 parser::parse_expression ()
3157 return parse_assignment ();
3162 parser::parse_assignment ()
3164 expression
* op1
= parse_ternary ();
3166 const token
* t
= peek ();
3167 // right-associative operators
3168 if (t
&& t
->type
== tok_operator
3169 && (t
->content
== "=" ||
3170 t
->content
== "<<<" ||
3171 t
->content
== "+=" ||
3172 t
->content
== "-=" ||
3173 t
->content
== "*=" ||
3174 t
->content
== "/=" ||
3175 t
->content
== "%=" ||
3176 t
->content
== "<<=" ||
3177 t
->content
== ">>=" ||
3178 t
->content
== "&=" ||
3179 t
->content
== "^=" ||
3180 t
->content
== "|=" ||
3181 t
->content
== ".=" ||
3184 // NB: lvalueness is checked during elaboration / translation
3185 assignment
* e
= new assignment
;
3190 e
->right
= parse_expression ();
3199 parser::parse_ternary ()
3201 expression
* op1
= parse_logical_or ();
3203 const token
* t
= peek ();
3204 if (t
&& t
->type
== tok_operator
&& t
->content
== "?")
3206 ternary_expression
* e
= new ternary_expression
;
3210 e
->truevalue
= parse_expression (); // XXX
3213 if (! (t
->type
== tok_operator
&& t
->content
== ":"))
3214 throw PARSE_ERROR (_("expected ':'"));
3217 e
->falsevalue
= parse_expression (); // XXX
3226 parser::parse_logical_or ()
3228 expression
* op1
= parse_logical_and ();
3230 const token
* t
= peek ();
3231 while (t
&& t
->type
== tok_operator
&& t
->content
== "||")
3233 logical_or_expr
* e
= new logical_or_expr
;
3238 e
->right
= parse_logical_and ();
3248 parser::parse_logical_and ()
3250 expression
* op1
= parse_boolean_or ();
3252 const token
* t
= peek ();
3253 while (t
&& t
->type
== tok_operator
&& t
->content
== "&&")
3255 logical_and_expr
*e
= new logical_and_expr
;
3260 e
->right
= parse_boolean_or ();
3270 parser::parse_boolean_or ()
3272 expression
* op1
= parse_boolean_xor ();
3274 const token
* t
= peek ();
3275 while (t
&& t
->type
== tok_operator
&& t
->content
== "|")
3277 binary_expression
* e
= new binary_expression
;
3282 e
->right
= parse_boolean_xor ();
3292 parser::parse_boolean_xor ()
3294 expression
* op1
= parse_boolean_and ();
3296 const token
* t
= peek ();
3297 while (t
&& t
->type
== tok_operator
&& t
->content
== "^")
3299 binary_expression
* e
= new binary_expression
;
3304 e
->right
= parse_boolean_and ();
3314 parser::parse_boolean_and ()
3316 expression
* op1
= parse_array_in ();
3318 const token
* t
= peek ();
3319 while (t
&& t
->type
== tok_operator
&& t
->content
== "&")
3321 binary_expression
* e
= new binary_expression
;
3326 e
->right
= parse_array_in ();
3336 parser::parse_array_in ()
3338 // This is a very tricky case. All these are legit expressions:
3339 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
3340 vector
<expression
*> indexes
;
3341 bool parenthesized
= false;
3343 const token
* t
= peek ();
3344 if (t
&& t
->type
== tok_operator
&& t
->content
== "[")
3347 parenthesized
= true;
3353 if (t
&& t
->type
== tok_operator
&& t
->content
== "*" && parenthesized
)
3356 indexes
.push_back(NULL
);
3360 expression
* op1
= parse_comparison_or_regex_query ();
3361 indexes
.push_back (op1
);
3366 const token
* t
= peek ();
3367 if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
3372 else if (t
&& t
->type
== tok_operator
&& t
->content
== "]")
3378 throw PARSE_ERROR (_("expected ',' or ']'"));
3381 break; // expecting only one expression
3385 if (t
&& t
->type
== tok_keyword
&& t
->content
== "in")
3387 array_in
*e
= new array_in
;
3391 arrayindex
* a
= new arrayindex
;
3392 a
->indexes
= indexes
;
3393 a
->base
= parse_indexable();
3394 a
->tok
= a
->base
->tok
;
3398 else if (indexes
.size() == 1) // no "in" - need one expression only
3401 throw PARSE_ERROR (_("unexpected comma-separated expression list"));
3406 parser::parse_comparison_or_regex_query ()
3408 expression
* op1
= parse_shift ();
3410 // XXX precedence -- perhaps a =~ b == c =~ d --> (a =~ b) == (c =~ d) ?
3411 const token
*t
= peek();
3412 if (t
&& t
->type
== tok_operator
3413 && (t
->content
== "=~" ||
3414 t
->content
== "!~"))
3416 regex_query
* r
= new regex_query
;
3421 r
->right
= parse_literal_string();
3425 else while (t
&& t
->type
== tok_operator
3426 && (t
->content
== ">" ||
3427 t
->content
== "<" ||
3428 t
->content
== "==" ||
3429 t
->content
== "!=" ||
3430 t
->content
== "<=" ||
3431 t
->content
== ">="))
3433 comparison
* e
= new comparison
;
3438 e
->right
= parse_shift ();
3448 parser::parse_shift ()
3450 expression
* op1
= parse_concatenation ();
3452 const token
* t
= peek ();
3453 while (t
&& t
->type
== tok_operator
&&
3454 (t
->content
== "<<" || t
->content
== ">>"))
3456 binary_expression
* e
= new binary_expression
;
3461 e
->right
= parse_concatenation ();
3471 parser::parse_concatenation ()
3473 expression
* op1
= parse_additive ();
3475 const token
* t
= peek ();
3476 // XXX: the actual awk string-concatenation operator is *whitespace*.
3477 // I don't know how to easily to model that here.
3478 while (t
&& t
->type
== tok_operator
&& t
->content
== ".")
3480 concatenation
* e
= new concatenation
;
3485 e
->right
= parse_additive ();
3495 parser::parse_additive ()
3497 expression
* op1
= parse_multiplicative ();
3499 const token
* t
= peek ();
3500 while (t
&& t
->type
== tok_operator
3501 && (t
->content
== "+" || t
->content
== "-"))
3503 binary_expression
* e
= new binary_expression
;
3508 e
->right
= parse_multiplicative ();
3518 parser::parse_multiplicative ()
3520 expression
* op1
= parse_unary ();
3522 const token
* t
= peek ();
3523 while (t
&& t
->type
== tok_operator
3524 && (t
->content
== "*" || t
->content
== "/" || t
->content
== "%"))
3526 binary_expression
* e
= new binary_expression
;
3531 e
->right
= parse_unary ();
3541 parser::parse_unary ()
3543 const token
* t
= peek ();
3544 if (t
&& t
->type
== tok_operator
3545 && (t
->content
== "+" ||
3546 t
->content
== "-" ||
3547 t
->content
== "!" ||
3548 t
->content
== "~" ||
3551 unary_expression
* e
= new unary_expression
;
3555 e
->operand
= parse_unary ();
3559 return parse_crement ();
3564 parser::parse_crement () // as in "increment" / "decrement"
3566 // NB: Ideally, we'd parse only a symbol as an operand to the
3567 // *crement operators, instead of a general expression value. We'd
3568 // need more complex lookahead code to tell apart the postfix cases.
3569 // So we just punt, and leave it to pass-3 to signal errors on
3570 // cases like "4++".
3572 const token
* t
= peek ();
3573 if (t
&& t
->type
== tok_operator
3574 && (t
->content
== "++" || t
->content
== "--"))
3576 pre_crement
* e
= new pre_crement
;
3580 e
->operand
= parse_dwarf_value ();
3584 // post-crement or non-crement
3585 expression
*op1
= parse_dwarf_value ();
3588 if (t
&& t
->type
== tok_operator
3589 && (t
->content
== "++" || t
->content
== "--"))
3591 post_crement
* e
= new post_crement
;
3604 parser::parse_dwarf_value ()
3606 expression
* expr
= NULL
;
3607 target_symbol
* tsym
= NULL
;
3609 // With '&' we'll definitely be making a target symbol of some sort
3610 const token
* addrtok
= peek_op ("&") ? next () : NULL
;
3611 bool addressof
= (addrtok
!= NULL
);
3613 // First try target_symbol types: $var, @cast, and @var.
3614 const token
* t
= peek ();
3615 if (t
&& t
->type
== tok_identifier
&& t
->content
[0] == '$')
3616 expr
= tsym
= parse_target_symbol ();
3617 else if (tok_is (t
, tok_operator
, "@cast"))
3618 expr
= tsym
= parse_cast_op ();
3619 else if (tok_is (t
, tok_operator
, "@var"))
3620 expr
= tsym
= parse_atvar_op ();
3621 else if (addressof
&& !input
.has_version("2.6"))
3622 // '&' on old version only allowed specific target_symbol types
3623 throw PARSE_ERROR (_("expected @cast, @var or $var"));
3625 // Otherwise just get a plain value of any sort.
3626 expr
= parse_value ();
3628 // If we had '&' or see any target suffixes, that forces a target_symbol.
3629 // For compatibility, we only do this starting with 2.6.
3630 if (!tsym
&& (addressof
|| peek_target_symbol_components ())
3631 && input
.has_version("2.6"))
3633 autocast_op
*cop
= new autocast_op
;
3634 cop
->tok
= addrtok
?: peek ();
3635 cop
->operand
= expr
;
3641 // Parse the rest of any kind of target symbol
3642 tsym
->addressof
= addressof
;
3643 parse_target_symbol_components (tsym
);
3651 parser::parse_value ()
3653 const token
* t
= peek ();
3655 throw PARSE_ERROR (_("expected value"));
3657 if (t
->type
== tok_embedded
)
3660 throw PARSE_ERROR (_("embedded expression code in unprivileged script; need stap -g"), false);
3662 embedded_expr
*e
= new embedded_expr
;
3664 e
->code
= t
->content
;
3669 if (t
->type
== tok_operator
&& t
->content
== "(")
3672 expression
* e
= parse_expression ();
3674 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
3675 throw PARSE_ERROR (_("expected ')'"));
3679 else if (t
->type
== tok_identifier
3680 || (t
->type
== tok_operator
&& t
->content
[0] == '@'))
3681 return parse_symbol ();
3683 return parse_literal ();
3688 parser::parse_hist_op_or_bare_name (hist_op
*&hop
, interned_string
&name
)
3691 const token
* t
= expect_ident_or_atword (name
);
3692 if (name
== "@hist_linear" || name
== "@hist_log")
3695 if (name
== "@hist_linear")
3696 hop
->htype
= hist_linear
;
3697 else if (name
== "@hist_log")
3698 hop
->htype
= hist_log
;
3701 hop
->stat
= parse_expression ();
3703 if (hop
->htype
== hist_linear
)
3705 for (size_t i
= 0; i
< 3; ++i
)
3708 expect_number (tnum
);
3709 hop
->params
.push_back (tnum
);
3719 parser::parse_indexable ()
3721 hist_op
*hop
= NULL
;
3722 interned_string name
;
3723 const token
*tok
= parse_hist_op_or_bare_name(hop
, name
);
3728 symbol
* sym
= new symbol
;
3736 // var, indexable[index], func(parms), printf("...", ...),
3737 // @defined, @entry, @stat_op(stat)
3738 expression
* parser::parse_symbol ()
3740 hist_op
*hop
= NULL
;
3742 interned_string name
;
3743 const token
*t
= parse_hist_op_or_bare_name(hop
, name
);
3747 // If we didn't get a hist_op, then we did get an identifier. We can
3748 // now scrutinize this identifier for the various magic forms of identifier
3749 // (printf, @stat_op...)
3751 // NB: PR11343: @defined() is not incompatible with earlier versions
3752 // of stap, so no need to check session.compatible for 1.2
3753 if (name
== "@defined")
3754 return parse_defined_op (t
);
3756 if (name
== "@entry")
3757 return parse_entry_op (t
);
3759 if (name
== "@perf")
3760 return parse_perf_op (t
);
3762 if (name
.size() > 0 && name
[0] == '@')
3764 stat_op
*sop
= new stat_op
;
3766 sop
->ctype
= sc_average
;
3767 else if (name
== "@count")
3768 sop
->ctype
= sc_count
;
3769 else if (name
== "@sum")
3770 sop
->ctype
= sc_sum
;
3771 else if (name
== "@min")
3772 sop
->ctype
= sc_min
;
3773 else if (name
== "@max")
3774 sop
->ctype
= sc_max
;
3776 throw PARSE_ERROR(_F("unknown operator %s",
3777 name
.to_string().c_str()));
3780 sop
->stat
= parse_expression ();
3785 else if (print_format
*fmt
= print_format::create(t
))
3788 if ((name
== "print" || name
== "println" ||
3789 name
== "sprint" || name
== "sprintln") &&
3790 (peek_op("@hist_linear") || peek_op("@hist_log")))
3792 // We have a special case where we recognize
3793 // print(@hist_foo(bar)) as a magic print-the-histogram
3794 // construct. This is sort of gross but it avoids
3795 // promoting histogram references to typeful
3799 t
= parse_hist_op_or_bare_name(hop
, name
);
3802 // It is, sadly, possible that even while parsing a
3803 // hist_op, we *mis-guessed* and the user wishes to
3804 // print(@hist_op(foo)[bucket]), a scalar. In that case
3805 // we must parse the arrayindex and print an expression.
3807 // XXX: This still fails if the arrayindex is part of a
3808 // larger expression. To really handle everything, we'd
3809 // need to push back all the hist tokens start over.
3815 // This is simplified version of the
3816 // multi-array-index parser below, because we can
3817 // only ever have one index on a histogram anyways.
3819 struct arrayindex
* ai
= new arrayindex
;
3822 ai
->indexes
.push_back (parse_expression ());
3824 fmt
->args
.push_back(ai
);
3826 // Consume any subsequent arguments.
3827 while (!peek_op (")"))
3830 expression
*e
= parse_expression ();
3831 fmt
->args
.push_back(e
);
3838 bool consumed_arg
= false;
3839 if (fmt
->print_with_format
)
3841 // Consume and convert a format string. Agreement between the
3842 // format string and the arguments is postponed to the
3843 // typechecking phase.
3844 literal_string
* ls
= parse_literal_string();
3845 fmt
->raw_components
= ls
->value
;
3847 fmt
->components
= print_format::string_to_components (fmt
->raw_components
);
3848 consumed_arg
= true;
3850 else if (fmt
->print_with_delim
)
3852 // Consume a delimiter to separate arguments.
3853 literal_string
* ls
= parse_literal_string();
3854 fmt
->delimiter
= ls
->value
;
3856 consumed_arg
= true;
3857 min_args
= 2; // so that the delim is used at least once
3859 else if (!fmt
->print_with_newline
)
3861 // If we are not printing with a format string, nor with a
3862 // delim, nor with a newline, then it's either print() or
3863 // sprint(), both of which require at least one argument (of
3868 // Consume any subsequent arguments.
3869 while (min_args
|| !peek_op (")"))
3873 expression
*e
= parse_expression ();
3874 fmt
->args
.push_back(e
);
3875 consumed_arg
= true;
3884 else if (peek_op ("(")) // function call
3887 struct functioncall
* f
= new functioncall
;
3890 // Allow empty actual parameter list
3898 f
->args
.push_back (parse_expression ());
3904 else if (peek_op (","))
3910 throw PARSE_ERROR (_("expected ',' or ')'"));
3923 // By now, either we had a hist_op in the first place, or else
3924 // we had a plain word and it was converted to a symbol.
3926 assert (!hop
!= !sym
); // logical XOR
3928 // All that remains is to check for array indexing
3930 if (peek_op ("[")) // array
3933 struct arrayindex
* ai
= new arrayindex
;
3946 ai
->indexes
.push_back (NULL
);
3949 ai
->indexes
.push_back (parse_expression ());
3955 else if (peek_op (","))
3961 throw PARSE_ERROR (_("expected ',' or ']'"));
3967 // If we got to here, we *should* have a symbol; if we have
3968 // a hist_op on its own, it doesn't count as an expression,
3969 // so we throw a parse error.
3972 throw PARSE_ERROR(_("base histogram operator where expression expected"), t
);
3978 target_symbol
* parser::parse_target_symbol ()
3980 const token
* t
= next ();
3981 if (t
->type
== tok_identifier
&& t
->content
[0]=='$')
3983 // target_symbol time
3984 target_symbol
*tsym
= new target_symbol
;
3986 tsym
->name
= t
->content
;
3990 throw PARSE_ERROR (_("expected $var"));
3995 cast_op
* parser::parse_cast_op ()
3997 const token
* t
= next ();
3998 if (t
->type
== tok_operator
&& t
->content
== "@cast")
4000 cast_op
*cop
= new cast_op
;
4002 cop
->name
= t
->content
;
4004 cop
->operand
= parse_expression ();
4006 expect_unknown(tok_string
, cop
->type_name
);
4007 if (cop
->type_name
.empty())
4008 throw PARSE_ERROR (_("expected non-empty string"));
4012 expect_unknown(tok_string
, cop
->module
);
4018 throw PARSE_ERROR (_("expected @cast"));
4023 atvar_op
* parser::parse_atvar_op ()
4025 const token
* t
= next ();
4026 if (t
->type
== tok_operator
&& t
->content
== "@var")
4028 atvar_op
*aop
= new atvar_op
;
4030 aop
->name
= t
->content
;
4032 expect_unknown(tok_string
, aop
->target_name
);
4033 size_t found_at
= aop
->target_name
.find("@");
4034 if (found_at
!= string::npos
)
4035 aop
->cu_name
= aop
->target_name
.substr(found_at
+ 1);
4041 expect_unknown (tok_string
, aop
->module
);
4049 throw PARSE_ERROR (_("expected @var"));
4053 // Parse a @defined(). Given head token has already been consumed.
4054 expression
* parser::parse_defined_op (const token
* t
)
4056 defined_op
* dop
= new defined_op
;
4059 dop
->operand
= parse_expression ();
4065 // Parse a @entry(). Given head token has already been consumed.
4066 expression
* parser::parse_entry_op (const token
* t
)
4068 entry_op
* eop
= new entry_op
;
4071 eop
->operand
= parse_expression ();
4077 // Parse a @perf(). Given head token has already been consumed.
4078 expression
* parser::parse_perf_op (const token
* t
)
4080 perf_op
* pop
= new perf_op
;
4083 pop
->operand
= parse_literal_string ();
4084 if (pop
->operand
->value
== "")
4085 throw PARSE_ERROR (_("expected non-empty string"));
4092 parser::peek_target_symbol_components ()
4094 const token
* t
= peek ();
4096 ((t
->type
== tok_operator
&& (t
->content
== "->" || t
->content
== "["))
4097 || (t
->type
== tok_identifier
&&
4098 t
->content
.find_first_not_of('$') == string::npos
));
4102 parser::parse_target_symbol_components (target_symbol
* e
)
4104 bool pprint
= false;
4106 // check for pretty-print in the form $foo$
4107 string base
= e
->name
;
4108 size_t pprint_pos
= base
.find_last_not_of('$');
4109 if (0 < pprint_pos
&& pprint_pos
< base
.length() - 1)
4111 string pprint_val
= base
.substr(pprint_pos
+ 1);
4112 base
.erase(pprint_pos
+ 1);
4114 e
->components
.push_back (target_symbol::component(e
->tok
, pprint_val
, true));
4122 const token
* t
= next();
4123 interned_string member
;
4124 expect_ident_or_keyword (member
);
4126 // check for pretty-print in the form $foo->$ or $foo->bar$
4127 pprint_pos
= member
.find_last_not_of('$');
4128 interned_string pprint_val
;
4129 if (pprint_pos
== string::npos
|| pprint_pos
< member
.length() - 1)
4131 pprint_val
= member
.substr(pprint_pos
+ 1);
4132 member
= member
.substr(0, pprint_pos
+ 1);
4136 if (!member
.empty())
4137 e
->components
.push_back (target_symbol::component(t
, member
));
4139 e
->components
.push_back (target_symbol::component(t
, pprint_val
, true));
4141 else if (peek_op ("["))
4143 const token
* t
= next();
4144 expression
* index
= parse_expression();
4145 literal_number
* ln
= dynamic_cast<literal_number
*>(index
);
4147 e
->components
.push_back (target_symbol::component(t
, ln
->value
));
4149 e
->components
.push_back (target_symbol::component(t
, index
));
4158 // check for pretty-print in the form $foo $
4159 // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$
4160 const token
* t
= peek();
4161 if (t
!= NULL
&& t
->type
== tok_identifier
&&
4162 t
->content
.find_first_not_of('$') == string::npos
)
4165 e
->components
.push_back (target_symbol::component(t
, t
->content
, true));
4170 if (pprint
&& (peek_op ("->") || peek_op("[")))
4171 throw PARSE_ERROR(_("-> and [ are not accepted for a pretty-printing variable"));
4174 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */