]> sourceware.org Git - systemtap.git/blame - parse.cxx
Fix PR20136 by using the @const() operator across the tapsets.
[systemtap.git] / parse.cxx
CommitLineData
2f1a1aea 1// recursive descent parser for systemtap scripts
e937a1d2 2// Copyright (C) 2005-2015 Red Hat Inc.
77a5c1f9 3// Copyright (C) 2006 Intel Corporation.
5811366a 4// Copyright (C) 2007 Bull S.A.S
92585d32 5// Copyright (C) 2014 Peter Kjellstrom <cap@nsc.liu.se>
69c68955
FCE
6//
7// This file is part of systemtap, and is free software. You can
8// redistribute it and/or modify it under the terms of the GNU General
9// Public License (GPL); either version 2, or (at your option) any
10// later version.
2f1a1aea 11
2b066ec1 12#include "config.h"
2f1a1aea
FCE
13#include "staptree.h"
14#include "parse.h"
177a8ead 15#include "session.h"
3f99432c 16#include "util.h"
aa389a19 17#include "stringtable.h"
3f99432c 18
2b066ec1 19#include <iostream>
eacb10ce 20
2b066ec1 21#include <fstream>
2f1a1aea 22#include <cctype>
9c0c0e46 23#include <cstdlib>
29e64872 24#include <cassert>
9c0c0e46
FCE
25#include <cerrno>
26#include <climits>
57b73400 27#include <sstream>
f74fb737 28#include <cstring>
3f99432c 29#include <cctype>
eacb10ce 30#include <iterator>
5d46f7cb 31#include <unordered_set>
eacb10ce 32
7a468d68
FCE
33extern "C" {
34#include <fnmatch.h>
35}
2f1a1aea
FCE
36
37using namespace std;
38
c18f07f8
JS
39
40class lexer
41{
42public:
fee28e5c 43 bool ate_comment; // current token follows a comment
b5477cd9 44 bool ate_whitespace; // the most recent token followed whitespace
534aad8b 45 bool saw_tokens; // the lexer found tokens (before preprocessing occurred)
f8405ea5 46 bool check_compatible; // whether to gate features on session.compatible
534aad8b 47
b5477cd9 48 token* scan ();
f086fe02 49 lexer (istream&, const string&, systemtap_session&, bool);
c18f07f8 50 void set_current_file (stapfile* f);
101b0805 51 void set_current_token_chain (const token* tok);
f8405ea5 52 inline bool has_version (const char* v) const;
c18f07f8 53
5775f11f 54 unordered_set<interned_string> keywords;
85c97fc2 55 static unordered_set<string> atwords;
c18f07f8
JS
56private:
57 inline int input_get ();
58 inline int input_peek (unsigned n=0);
aa389a19 59 void input_put (const string&, const token*);
c18f07f8 60 string input_name;
47d349b1 61 string input_contents; // NB: being a temporary, no need to interned_string optimize this object
aa389a19 62 const char *input_pointer; // index into input_contents; NB: recompute if input_contents changed!
c18f07f8 63 const char *input_end;
aa389a19
FCE
64 unsigned cursor_suspend_count;
65 unsigned cursor_suspend_line;
66 unsigned cursor_suspend_column;
c18f07f8
JS
67 unsigned cursor_line;
68 unsigned cursor_column;
69 systemtap_session& session;
70 stapfile* current_file;
101b0805 71 const token* current_token_chain;
c18f07f8
JS
72};
73
74
75class parser
76{
77public:
f8405ea5 78 parser (systemtap_session& s, const string& n, istream& i, unsigned flags=0);
c18f07f8
JS
79 ~parser ();
80
f8405ea5
JS
81 stapfile* parse ();
82 probe* parse_synthetic_probe (const token* chain);
83 stapfile* parse_library_macros ();
c18f07f8
JS
84
85private:
86 typedef enum {
87 PP_NONE,
88 PP_KEEP_THEN,
89 PP_SKIP_THEN,
90 PP_KEEP_ELSE,
91 PP_SKIP_ELSE,
92 } pp_state_t;
93
534aad8b
SM
94 struct pp1_activation;
95
fe410f52
SM
96 struct pp_macrodecl : public macrodecl {
97 pp1_activation* parent_act; // used for param bindings
98 virtual bool is_closure() { return parent_act != 0; }
99 pp_macrodecl () : macrodecl(), parent_act(0) { }
534aad8b
SM
100 };
101
c18f07f8
JS
102 systemtap_session& session;
103 string input_name;
c18f07f8 104 lexer input;
f8405ea5 105 bool errs_as_warnings;
c18f07f8 106 bool privileged;
7b5b30a8 107 bool user_file;
c18f07f8
JS
108 parse_context context;
109
534aad8b
SM
110 // preprocessing subordinate, first pass (macros)
111 struct pp1_activation {
112 const token* tok;
113 unsigned cursor; // position within macro body
114 map<string, pp_macrodecl*> params;
534aad8b 115
fe410f52 116 macrodecl* curr_macro;
534aad8b 117
bdf7707b
JS
118 pp1_activation (const token* tok, macrodecl* curr_macro)
119 : tok(tok), cursor(0), curr_macro(curr_macro) { }
534aad8b
SM
120 ~pp1_activation ();
121 };
122
fe410f52 123 map<string, macrodecl*> pp1_namespace;
534aad8b
SM
124 vector<pp1_activation*> pp1_state;
125 const token* next_pp1 ();
ed891cf3 126 const token* scan_pp1 (bool ignore_macros);
534aad8b
SM
127 const token* slurp_pp1_param (vector<const token*>& param);
128 const token* slurp_pp1_body (vector<const token*>& body);
129
130 // preprocessing subordinate, final pass (conditionals)
c18f07f8 131 vector<pair<const token*, pp_state_t> > pp_state;
b5477cd9 132 const token* scan_pp ();
c18f07f8
JS
133 const token* skip_pp ();
134
135 // scanning state
b5477cd9
SM
136 const token* next ();
137 const token* peek ();
c18f07f8 138
731a5359
MW
139 // Advance past and throw away current token after peek () or next ().
140 void swallow ();
141
a07a2c28 142 const token* systemtap_v_seen;
c18f07f8
JS
143 const token* last_t; // the last value returned by peek() or next()
144 const token* next_t; // lookahead token
145
731a5359
MW
146 // expectations, these swallow the token
147 void expect_known (token_type tt, string const & expected);
47d349b1 148 void expect_unknown (token_type tt, interned_string & target);
47d349b1 149 void expect_unknown2 (token_type tt1, token_type tt2, interned_string & target);
731a5359
MW
150
151 // convenience forms, these also swallow the token
152 void expect_op (string const & expected);
891b96e6 153 interned_string expect_op_any (initializer_list<const char*> expected);
731a5359
MW
154 void expect_kw (string const & expected);
155 void expect_number (int64_t & expected);
b1f2b0e8 156 void expect_ident_or_keyword (interned_string & target);
731a5359
MW
157
158 // convenience forms, which return true or false, these don't swallow token
c18f07f8
JS
159 bool peek_op (string const & op);
160 bool peek_kw (string const & kw);
161
731a5359
MW
162 // convenience forms, which return the token
163 const token* expect_kw_token (string const & expected);
b1f2b0e8 164 const token* expect_ident_or_atword (interned_string & target);
731a5359 165
7ac01ea0 166 void print_error (const parse_error& pe, bool errs_as_warnings = false);
c18f07f8
JS
167 unsigned num_errors;
168
169private: // nonterminals
170 void parse_probe (vector<probe*>&, vector<probe_alias*>&);
f41e297c
JS
171 void parse_private (vector<vardecl*>&, vector<probe*>&,
172 string const&, vector<functiondecl*>&);
173 void parse_global (vector<vardecl*>&, vector<probe*>&,
174 string const&);
175 void do_parse_global (vector<vardecl*>&, vector<probe*>&,
176 string const&, const token*, bool);
177 void parse_functiondecl (vector<functiondecl*>&, string const&);
178 void do_parse_functiondecl (vector<functiondecl*>&, const token*,
179 string const&, bool);
c18f07f8 180 embeddedcode* parse_embeddedcode ();
380d759b
FL
181 vector<probe_point*> parse_probe_points ();
182 vector<probe_point*> parse_components ();
183 vector<probe_point*> parse_component ();
d24f1ff4
SM
184 literal_string* consume_string_literals (const token*);
185 literal_string* parse_literal_string ();
c18f07f8
JS
186 literal* parse_literal ();
187 block* parse_stmt_block ();
188 try_block* parse_try_block ();
189 statement* parse_statement ();
190 if_statement* parse_if_statement ();
191 for_loop* parse_for_loop ();
192 for_loop* parse_while_loop ();
193 foreach_loop* parse_foreach_loop ();
194 expr_statement* parse_expr_statement ();
195 return_statement* parse_return_statement ();
196 delete_statement* parse_delete_statement ();
197 next_statement* parse_next_statement ();
198 break_statement* parse_break_statement ();
199 continue_statement* parse_continue_statement ();
200 indexable* parse_indexable ();
b1f2b0e8 201 const token *parse_hist_op_or_bare_name (hist_op *&hop, interned_string &name);
0fb0cac9
JS
202 target_symbol *parse_target_symbol ();
203 cast_op *parse_cast_op ();
204 atvar_op *parse_atvar_op ();
8cc799a5 205 expression* parse_entry_op (const token* t);
c18f07f8 206 expression* parse_defined_op (const token* t);
0a7eb12d 207 expression* parse_const_op (const token* t);
3689db05 208 expression* parse_perf_op (const token* t);
c18f07f8
JS
209 expression* parse_expression ();
210 expression* parse_assignment ();
211 expression* parse_ternary ();
212 expression* parse_logical_or ();
213 expression* parse_logical_and ();
214 expression* parse_boolean_or ();
215 expression* parse_boolean_xor ();
216 expression* parse_boolean_and ();
217 expression* parse_array_in ();
93daaca8 218 expression* parse_comparison_or_regex_query ();
c18f07f8
JS
219 expression* parse_shift ();
220 expression* parse_concatenation ();
221 expression* parse_additive ();
222 expression* parse_multiplicative ();
223 expression* parse_unary ();
224 expression* parse_crement ();
0fb0cac9 225 expression* parse_dwarf_value ();
c18f07f8
JS
226 expression* parse_value ();
227 expression* parse_symbol ();
228
0fb0cac9 229 bool peek_target_symbol_components ();
c18f07f8
JS
230 void parse_target_symbol_components (target_symbol* e);
231};
232
233
2f1a1aea
FCE
234// ------------------------------------------------------------------------
235
c18f07f8 236stapfile*
ba48c27a 237parse (systemtap_session& s, const string& n, istream& i, unsigned flags)
c18f07f8 238{
ba48c27a 239 parser p (s, n, i, flags);
f8405ea5 240 return p.parse ();
c18f07f8
JS
241}
242
c18f07f8 243stapfile*
f8405ea5 244parse (systemtap_session& s, const string& name, unsigned flags)
c18f07f8 245{
4cd32d8c
JS
246 ifstream i(name.c_str(), ios::in);
247 if (i.fail())
248 {
249 cerr << (file_exists(name)
250 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
251 : _F("Input file '%s' is missing.", name.c_str()))
252 << endl;
253 return 0;
254 }
255
f8405ea5
JS
256 parser p (s, name, i, flags);
257 return p.parse ();
c18f07f8
JS
258}
259
fe410f52 260stapfile*
f8405ea5 261parse_library_macros (systemtap_session& s, const string& name)
fe410f52
SM
262{
263 ifstream i(name.c_str(), ios::in);
264 if (i.fail())
265 {
266 cerr << (file_exists(name)
267 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
268 : _F("Input file '%s' is missing.", name.c_str()))
269 << endl;
270 return 0;
271 }
272
f8405ea5
JS
273 parser p (s, name, i);
274 return p.parse_library_macros ();
fe410f52
SM
275}
276
101b0805 277probe*
aa389a19 278parse_synthetic_probe (systemtap_session &s, istream& i, const token* tok)
101b0805 279{
d026d78c 280 parser p (s, tok ? tok->location.file->name : "<synthetic>", i);
f8405ea5 281 return p.parse_synthetic_probe (tok);
101b0805
JS
282}
283
c18f07f8 284// ------------------------------------------------------------------------
bb2e3076 285
f8405ea5 286parser::parser (systemtap_session& s, const string &n, istream& i, unsigned flags):
f086fe02 287 session (s), input_name (n), input (i, input_name, s, !(flags & pf_no_compatible)),
f8405ea5 288 errs_as_warnings(flags & pf_squash_errors), privileged (flags & pf_guru),
7b5b30a8
FL
289 user_file (flags & pf_user_file), context(con_unknown), systemtap_v_seen(0),
290 last_t (0), next_t (0), num_errors (0)
4cd32d8c
JS
291{
292}
2f1a1aea
FCE
293
294parser::~parser()
295{
2f1a1aea
FCE
296}
297
d7f3e0c5
GH
298static string
299tt2str(token_type tt)
300{
301 switch (tt)
302 {
303 case tok_junk: return "junk";
304 case tok_identifier: return "identifier";
305 case tok_operator: return "operator";
306 case tok_string: return "string";
307 case tok_number: return "number";
308 case tok_embedded: return "embedded-code";
6e213f58 309 case tok_keyword: return "keyword";
d7f3e0c5
GH
310 }
311 return "unknown token";
312}
82919855 313
0323ed4d
WC
314ostream&
315operator << (ostream& o, const source_loc& loc)
316{
a704a23b 317 o << loc.file->name << ":"
0323ed4d
WC
318 << loc.line << ":"
319 << loc.column;
320
321 return o;
322}
323
56099f08
FCE
324ostream&
325operator << (ostream& o, const token& t)
326{
d7f3e0c5 327 o << tt2str(t.type);
56099f08 328
6e213f58 329 if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
56099f08 330 {
24cb178f
FCE
331 o << " '";
332 for (unsigned i=0; i<t.content.length(); i++)
333 {
334 char c = t.content[i];
335 o << (isprint (c) ? c : '?');
336 }
337 o << "'";
56099f08 338 }
56099f08 339
dff50e09 340 o << " at "
0323ed4d 341 << t.location;
56099f08
FCE
342
343 return o;
344}
345
346
dff50e09 347void
7ac01ea0 348parser::print_error (const parse_error &pe, bool errs_as_warnings)
2f1a1aea 349{
16fc963f 350 const token *tok = pe.tok ? pe.tok : last_t;
7ac01ea0 351 session.print_error(pe, tok, input_name, errs_as_warnings);
2f1a1aea
FCE
352 num_errors ++;
353}
354
355
2f1a1aea 356
c434ec7e
FCE
357
358template <typename OPERAND>
359bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
360{
361 if (op->type == tok_operator && op->content == "<=")
362 { return lhs <= rhs; }
363 else if (op->type == tok_operator && op->content == ">=")
364 { return lhs >= rhs; }
365 else if (op->type == tok_operator && op->content == "<")
366 { return lhs < rhs; }
367 else if (op->type == tok_operator && op->content == ">")
368 { return lhs > rhs; }
369 else if (op->type == tok_operator && op->content == "==")
370 { return lhs == rhs; }
371 else if (op->type == tok_operator && op->content == "!=")
372 { return lhs != rhs; }
373 else
f0454224 374 throw PARSE_ERROR (_("expected comparison operator"), op);
c434ec7e
FCE
375}
376
377
534aad8b
SM
378// Here, we perform on-the-fly preprocessing in two passes.
379
380// First pass - macro declaration and expansion.
381//
382// The basic form of a declaration is @define SIGNATURE %( BODY %)
383// where SIGNATURE is of the form macro_name (a, b, c, ...)
384// and BODY can obtain the parameter contents as @a, @b, @c, ....
385// Note that parameterless macros can also be declared.
386//
3932c705 387// Macro definitions may not be nested.
534aad8b
SM
388// A macro is available textually after it has been defined.
389//
390// The basic form of a macro invocation
391// for a parameterless macro is @macro_name,
392// for a macro with parameters is @macro_name(param_1, param_2, ...).
393//
26718dbe
SM
394// NB: this means that a parameterless macro @foo called as @foo(a, b, c)
395// leaves its 'parameters' alone, rather than consuming them to result
396// in a "too many parameters error". This may be useful in the unusual
397// case of wanting @foo to expand to the name of a function.
534aad8b
SM
398//
399// Invocations of unknown macros are left unexpanded, to allow
400// the continued use of constructs such as @cast, @var, etc.
401
fe410f52 402macrodecl::~macrodecl ()
534aad8b
SM
403{
404 delete tok;
405 for (vector<const token*>::iterator it = body.begin();
406 it != body.end(); it++)
407 delete *it;
408}
409
410parser::pp1_activation::~pp1_activation ()
411{
412 delete tok;
fe410f52 413 if (curr_macro->is_closure()) return; // body is shared with an earlier declaration
534aad8b
SM
414 for (map<string, pp_macrodecl*>::iterator it = params.begin();
415 it != params.end(); it++)
416 delete it->second;
417}
418
419// Grab a token from the current input source (main file or macro body):
420const token*
421parser::next_pp1 ()
422{
423 if (pp1_state.empty())
424 return input.scan ();
425
426 // otherwise, we're inside a macro
427 pp1_activation* act = pp1_state.back();
428 unsigned& cursor = act->cursor;
429 if (cursor < act->curr_macro->body.size())
430 {
431 token* t = new token(*act->curr_macro->body[cursor]);
0002fc51 432 t->chain = new token(*act->tok); // mark chained token
534aad8b
SM
433 cursor++;
434 return t;
435 }
436 else
437 return 0; // reached end of macro body
438}
439
440const token*
ed891cf3 441parser::scan_pp1 (bool ignore_macros = false)
534aad8b
SM
442{
443 while (true)
444 {
445 const token* t = next_pp1 ();
446 if (t == 0) // EOF or end of macro body
447 {
448 if (pp1_state.empty()) // actual EOF
449 return 0;
450
451 // Exit macro and loop around to look for the next token.
452 pp1_activation* act = pp1_state.back();
453 pp1_state.pop_back(); delete act;
454 continue;
455 }
456
457 // macro definition
ed891cf3
MC
458 // PR18462 don't catalog preprocessor-disabled macros
459 if (t->type == tok_operator && t->content == "@define" && !ignore_macros)
534aad8b
SM
460 {
461 if (!pp1_state.empty())
f0454224 462 throw PARSE_ERROR (_("'@define' forbidden inside macro body"), t);
534aad8b
SM
463 delete t;
464
465 // handle macro definition
466 // (1) consume macro signature
3932c705 467 t = input.scan();
534aad8b 468 if (! (t && t->type == tok_identifier))
f0454224 469 throw PARSE_ERROR (_("expected identifier"), t);
47d349b1 470 string name = t->content;
534aad8b
SM
471
472 // check for redefinition of existing macro
473 if (pp1_namespace.find(name) != pp1_namespace.end())
78ab2de3 474 {
f0454224 475 parse_error er (ERR_SRC, _F("attempt to redefine macro '@%s' in the same file", name.c_str ()), t);
78ab2de3
SM
476
477 // Also point to pp1_namespace[name]->tok, the site of
478 // the original definition:
f0454224 479 er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here",
78ab2de3
SM
480 name.c_str()), pp1_namespace[name]->tok);
481 throw er;
482 }
483
484 // XXX: the above restriction was mostly necessary due to
485 // wanting to leave open the possibility of
486 // statically-scoped semantics in the future.
26718dbe
SM
487
488 // XXX: this cascades into further parse errors as the
489 // parser tries to parse the remaining definition... (e.g.
490 // it can't tell that the macro body isn't a conditional,
491 // that the uses of parameters aren't nonexistent
492 // macros.....)
534aad8b 493 if (name == "define")
f0454224 494 throw PARSE_ERROR (_("attempt to redefine '@define'"), t);
17f56d2a 495 if (input.atwords.count(name))
534aad8b
SM
496 session.print_warning (_F("macro redefines built-in operator '@%s'", name.c_str()), t);
497
fe410f52
SM
498 macrodecl* decl = (pp1_namespace[name] = new macrodecl);
499 decl->tok = t;
534aad8b
SM
500
501 // determine if the macro takes parameters
4ac28d7e 502 bool saw_params = false;
3932c705
SM
503 t = input.scan();
504 if (t && t->type == tok_operator && t->content == "(")
4ac28d7e
SM
505 {
506 saw_params = true;
507 do
508 {
509 delete t;
510
511 t = input.scan ();
512 if (! (t && t->type == tok_identifier))
f0454224 513 throw PARSE_ERROR(_("expected identifier"), t);
47d349b1 514 decl->formal_args.push_back(t->content);
4ac28d7e
SM
515 delete t;
516
517 t = input.scan ();
518 if (t && t->type == tok_operator && t->content == ",")
519 {
520 continue;
521 }
522 else if (t && t->type == tok_operator && t->content == ")")
523 {
524 delete t;
525 t = input.scan();
526 break;
527 }
528 else
529 {
f0454224 530 throw PARSE_ERROR (_("expected ',' or ')'"), t);
4ac28d7e
SM
531 }
532 }
533 while (true);
534 }
534aad8b
SM
535
536 // (2) identify & consume macro body
3932c705 537 if (! (t && t->type == tok_operator && t->content == "%("))
4ac28d7e
SM
538 {
539 if (saw_params)
f0454224 540 throw PARSE_ERROR (_("expected '%('"), t);
4ac28d7e 541 else
f0454224 542 throw PARSE_ERROR (_("expected '%(' or '('"), t);
4ac28d7e 543 }
3932c705 544 delete t;
534aad8b 545
3932c705
SM
546 t = slurp_pp1_body (decl->body);
547 if (!t)
f0454224 548 throw PARSE_ERROR (_("incomplete macro definition - missing '%)'"), decl->tok);
3932c705 549 delete t;
534aad8b
SM
550
551 // Now loop around to look for a real token.
552 continue;
553 }
554
555 // (potential) macro invocation
556 if (t->type == tok_operator && t->content[0] == '@')
557 {
7371cd19 558 const string& name = t->content.substr(1); // strip initial '@'
534aad8b
SM
559
560 // check if name refers to a real parameter or macro
fe410f52 561 macrodecl* decl;
534aad8b
SM
562 pp1_activation* act = pp1_state.empty() ? 0 : pp1_state.back();
563 if (act && act->params.find(name) != act->params.end())
564 decl = act->params[name];
fe410f52
SM
565 else if (!(act && act->curr_macro->context == ctx_library)
566 && pp1_namespace.find(name) != pp1_namespace.end())
534aad8b 567 decl = pp1_namespace[name];
fe410f52
SM
568 else if (session.library_macros.find(name)
569 != session.library_macros.end())
570 decl = session.library_macros[name];
534aad8b
SM
571 else // this is an ordinary @operator
572 return t;
573
bdf7707b
JS
574 // handle macro invocation, taking ownership of t
575 pp1_activation *new_act = new pp1_activation(t, decl);
fe410f52 576 unsigned num_params = decl->formal_args.size();
534aad8b
SM
577
578 // (1a) restore parameter invocation closure
fe410f52 579 if (num_params == 0 && decl->is_closure())
534aad8b
SM
580 {
581 // NB: decl->parent_act is always safe since the
582 // parameter decl (if any) comes from an activation
583 // record which deeper in the stack than new_act.
584
585 // decl is a macro parameter which must be evaluated in
586 // the context of the original point of invocation:
fe410f52 587 new_act->params = ((pp_macrodecl*)decl)->parent_act->params;
534aad8b
SM
588 goto expand;
589 }
590
591 // (1b) consume macro parameters (if any)
592 if (num_params == 0)
593 goto expand;
594
595 // for simplicity, we do not allow macro constructs here
596 // -- if we did, we'd have to recursively call scan_pp1()
597 t = next_pp1 ();
39566df2 598 if (! (t && t->type == tok_operator && t->content == "("))
534aad8b
SM
599 {
600 delete new_act;
f0454224 601 throw PARSE_ERROR (_NF
534aad8b 602 ("expected '(' in invocation of macro '@%s'"
f499dee5 603 " taking %d parameter",
534aad8b 604 "expected '(' in invocation of macro '@%s'"
f499dee5 605 " taking %d parameters",
52c2652f 606 num_params, name.c_str(), num_params), t);
534aad8b
SM
607 }
608
609 // XXX perhaps parse/count the full number of params,
610 // so we can say "expected x, found y params" on error?
611 for (unsigned i = 0; i < num_params; i++)
612 {
613 delete t;
614
615 // create parameter closure
fe410f52 616 string param_name = decl->formal_args[i];
534aad8b 617 pp_macrodecl* p = (new_act->params[param_name]
fe410f52
SM
618 = new pp_macrodecl);
619 p->tok = new token(*new_act->tok);
620 p->parent_act = act;
534aad8b
SM
621 // NB: *new_act->tok points to invocation, act is NULL at top level
622
623 t = slurp_pp1_param (p->body);
624
625 // check correct usage of ',' or ')'
626 if (t == 0) // hit unexpected EOF or end of macro
627 {
628 // XXX could we pop the stack and continue parsing
629 // the invocation, allowing macros to construct new
630 // invocations in piecemeal fashion??
631 const token* orig_t = new token(*new_act->tok);
632 delete new_act;
f0454224 633 throw PARSE_ERROR (_("could not find end of macro invocation"), orig_t);
534aad8b
SM
634 }
635 if (t->type == tok_operator && t->content == ",")
636 {
637 if (i + 1 == num_params)
638 {
639 delete new_act;
f0454224 640 throw PARSE_ERROR (_F("too many parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
534aad8b
SM
641 }
642 }
643 else if (t->type == tok_operator && t->content == ")")
644 {
645 if (i + 1 != num_params)
646 {
647 delete new_act;
f0454224 648 throw PARSE_ERROR (_F("too few parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
534aad8b
SM
649 }
650 }
651 else
652 {
653 // XXX this is, incidentally, impossible
654 delete new_act;
f0454224 655 throw PARSE_ERROR(_("expected ',' or ')' after macro parameter"), t);
534aad8b
SM
656 }
657 }
658
659 delete t;
660
661 // (2) set up macro expansion
662 expand:
663 pp1_state.push_back (new_act);
664
665 // Now loop around to look for a real token.
666 continue;
667 }
668
669 // Otherwise, we have an ordinary token.
670 return t;
671 }
672}
673
36fa6eb3 674// Consume a single macro invocation's parameters, heeding nesting
534aad8b
SM
675// brackets and stopping on an unbalanced ')' or an unbracketed ','
676// (and returning the final separator token).
677const token*
678parser::slurp_pp1_param (vector<const token*>& param)
679{
680 const token* t = 0;
681 unsigned nesting = 0;
682 do
683 {
684 t = next_pp1 ();
685
686 if (!t)
687 break;
36fa6eb3
FCE
688 // [ needed in case macro paramater is used as prefix for array-deref operation
689 if (t->type == tok_operator && (t->content == "(" || t->content == "["))
534aad8b 690 ++nesting;
36fa6eb3 691 else if (nesting && t->type == tok_operator && (t->content == ")" || t->content == "]"))
534aad8b
SM
692 --nesting;
693 else if (!nesting && t->type == tok_operator
694 && (t->content == ")" || t->content == ","))
695 break;
696 param.push_back(t);
697 }
698 while (true);
699 return t; // report ")" or "," or NULL
700}
701
702
703// Consume a macro declaration's body, heeding nested %( %) brackets.
704const token*
705parser::slurp_pp1_body (vector<const token*>& body)
706{
707 const token* t = 0;
708 unsigned nesting = 0;
709 do
710 {
711 t = next_pp1 ();
712
713 if (!t)
714 break;
715 if (t->type == tok_operator && t->content == "%(")
716 ++nesting;
717 else if (nesting && t->type == tok_operator && t->content == "%)")
718 --nesting;
719 else if (!nesting && t->type == tok_operator && t->content == "%)")
720 break;
721 body.push_back(t);
722 }
723 while (true);
724 return t; // report final "%)" or NULL
725}
726
fe410f52
SM
727// Used for parsing .stpm files.
728stapfile*
f8405ea5 729parser::parse_library_macros ()
fe410f52
SM
730{
731 stapfile* f = new stapfile;
732 input.set_current_file (f);
733
734 try
735 {
ed891cf3 736 const token* t = scan_pp ();
fe410f52
SM
737
738 // Currently we only take objection to macro invocations if they
739 // produce a non-whitespace token after being expanded.
740
741 // XXX should we prevent macro invocations even if they expand to empty??
742
743 if (t != 0)
ed891cf3 744 throw PARSE_ERROR (_F("unexpected token in library macro file '%s'", input_name.c_str()), t);
fe410f52
SM
745
746 // We need to first check whether *any* of the macros are duplicates,
747 // then commit to including the entire file in the global namespace
748 // (or not). Yuck.
749 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
750 it != pp1_namespace.end(); it++)
751 {
752 string name = it->first;
753
754 if (session.library_macros.find(name) != session.library_macros.end())
755 {
f0454224
JL
756 parse_error er(ERR_SRC, _F("duplicate definition of library macro '@%s'", name.c_str()), it->second->tok);
757 er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here", name.c_str()), session.library_macros[name]->tok);
78ab2de3 758 print_error (er);
fe410f52 759
78ab2de3 760 delete er.chain;
fe410f52
SM
761 delete f;
762 return 0;
763 }
764 }
765
766 }
767 catch (const parse_error& pe)
768 {
7ac01ea0 769 print_error (pe, errs_as_warnings);
fe410f52
SM
770 delete f;
771 return 0;
772 }
773
774 // If no errors, include the entire file. Note how this is outside
775 // of the try-catch block -- no errors possible.
776 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
777 it != pp1_namespace.end(); it++)
778 {
779 string name = it->first;
780
781 session.library_macros[name] = it->second;
782 session.library_macros[name]->context = ctx_library;
fe410f52
SM
783 }
784
785 return f;
786}
787
534aad8b
SM
788// Second pass - preprocessor conditional expansion.
789//
177a8ead 790// The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
44ce8ed5
FCE
791// where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
792// or: arch COMPARISON-OP "arch-string"
db135493 793// or: systemtap_v COMPARISON-OP "version-string"
2e6dd9d0 794// or: systemtap_privilege COMPARISON-OP "privilege-string"
561079c8 795// or: CONFIG_foo COMPARISON-OP "config-string"
717a457b 796// or: CONFIG_foo COMPARISON-OP number
4227f98d 797// or: CONFIG_foo COMPARISON-OP CONFIG_bar
5811366a
FCE
798// or: "string1" COMPARISON-OP "string2"
799// or: number1 COMPARISON-OP number2
44ce8ed5 800// The %: ELSE-TOKENS part is optional.
177a8ead
FCE
801//
802// e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
c434ec7e 803// e.g. %( arch != "i?86" %? "foo" %: "baz" %)
561079c8 804// e.g. %( CONFIG_foo %? "foo" %: "baz" %)
177a8ead
FCE
805//
806// Up to an entire %( ... %) expression is processed by a single call
807// to this function. Tokens included by any nested conditions are
808// enqueued in a private vector.
809
810bool eval_pp_conditional (systemtap_session& s,
811 const token* l, const token* op, const token* r)
812{
44ce8ed5 813 if (l->type == tok_identifier && (l->content == "kernel_v" ||
db135493
FCE
814 l->content == "kernel_vr" ||
815 l->content == "systemtap_v"))
44ce8ed5 816 {
db135493 817 if (! (r->type == tok_string))
f0454224 818 throw PARSE_ERROR (_("expected string literal"), r);
db135493 819
44ce8ed5 820 string target_kernel_vr = s.kernel_release;
197a4d62 821 string target_kernel_v = s.kernel_base_release;
db135493 822 string target;
dff50e09 823
db135493
FCE
824 if (l->content == "kernel_v") target = target_kernel_v;
825 else if (l->content == "kernel_vr") target = target_kernel_vr;
826 else if (l->content == "systemtap_v") target = s.compatible;
827 else assert (0);
7a468d68 828
47d349b1 829 string query = r->content;
7a468d68
FCE
830 bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
831
44ce8ed5
FCE
832 // collect acceptable strverscmp results.
833 int rvc_ok1, rvc_ok2;
7a468d68 834 bool wc_ok = false;
44ce8ed5
FCE
835 if (op->type == tok_operator && op->content == "<=")
836 { rvc_ok1 = -1; rvc_ok2 = 0; }
837 else if (op->type == tok_operator && op->content == ">=")
838 { rvc_ok1 = 1; rvc_ok2 = 0; }
839 else if (op->type == tok_operator && op->content == "<")
840 { rvc_ok1 = -1; rvc_ok2 = -1; }
841 else if (op->type == tok_operator && op->content == ">")
842 { rvc_ok1 = 1; rvc_ok2 = 1; }
843 else if (op->type == tok_operator && op->content == "==")
7a468d68 844 { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
44ce8ed5 845 else if (op->type == tok_operator && op->content == "!=")
7a468d68 846 { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
44ce8ed5 847 else
f0454224 848 throw PARSE_ERROR (_("expected comparison operator"), op);
7a468d68
FCE
849
850 if ((!wc_ok) && rhs_wildcard)
f0454224 851 throw PARSE_ERROR (_("wildcard not allowed with order comparison operators"), op);
7a468d68
FCE
852
853 if (rhs_wildcard)
854 {
855 int rvc_result = fnmatch (query.c_str(), target.c_str(),
856 FNM_NOESCAPE); // spooky
857 bool badness = (rvc_result == 0) ^ (op->content == "==");
858 return !badness;
859 }
860 else
861 {
862 int rvc_result = strverscmp (target.c_str(), query.c_str());
863 // normalize rvc_result
864 if (rvc_result < 0) rvc_result = -1;
865 if (rvc_result > 0) rvc_result = 1;
866 return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
867 }
44ce8ed5 868 }
2e6dd9d0
SM
869 else if (l->type == tok_identifier && l->content == "systemtap_privilege")
870 {
871 string target_privilege =
cba5b802
SM
872 pr_contains(s.privilege, pr_stapdev) ? "stapdev"
873 : pr_contains(s.privilege, pr_stapsys) ? "stapsys"
874 : pr_contains(s.privilege, pr_stapusr) ? "stapusr"
2e6dd9d0
SM
875 : "none"; /* should be impossible -- s.privilege always one of above */
876 assert(target_privilege != "none");
877
878 if (! (r->type == tok_string))
f0454224 879 throw PARSE_ERROR (_("expected string literal"), r);
47d349b1 880 string query_privilege = r->content;
2e6dd9d0
SM
881
882 bool nomatch = (target_privilege != query_privilege);
883
884 bool result;
885 if (op->type == tok_operator && op->content == "==")
886 result = !nomatch;
887 else if (op->type == tok_operator && op->content == "!=")
888 result = nomatch;
889 else
f0454224 890 throw PARSE_ERROR (_("expected '==' or '!='"), op);
cba5b802 891 /* XXX perhaps allow <= >= and similar comparisons */
2e6dd9d0
SM
892
893 return result;
894 }
92585d32
PK
895 else if (l->type == tok_identifier && l->content == "guru_mode")
896 {
897 if (! (r->type == tok_number))
898 throw PARSE_ERROR (_("expected number"), r);
899 int64_t lhs = (int64_t) s.guru_mode;
47d349b1 900 int64_t rhs = lex_cast<int64_t>(r->content);
92585d32
PK
901 if (!((rhs == 0)||(rhs == 1)))
902 throw PARSE_ERROR (_("expected 0 or 1"), op);
903 if (!((op->type == tok_operator && op->content == "==") ||
904 (op->type == tok_operator && op->content == "!=")))
905 throw PARSE_ERROR (_("expected '==' or '!='"), op);
906
907 return eval_comparison (lhs, op, rhs);
908 }
44ce8ed5
FCE
909 else if (l->type == tok_identifier && l->content == "arch")
910 {
911 string target_architecture = s.architecture;
912 if (! (r->type == tok_string))
f0454224 913 throw PARSE_ERROR (_("expected string literal"), r);
47d349b1 914 string query_architecture = r->content;
dff50e09 915
7a468d68
FCE
916 int nomatch = fnmatch (query_architecture.c_str(),
917 target_architecture.c_str(),
918 FNM_NOESCAPE); // still spooky
919
561079c8
FCE
920 bool result;
921 if (op->type == tok_operator && op->content == "==")
922 result = !nomatch;
923 else if (op->type == tok_operator && op->content == "!=")
924 result = nomatch;
925 else
f0454224 926 throw PARSE_ERROR (_("expected '==' or '!='"), op);
561079c8 927
d9677d7b
DS
928 return result;
929 }
930 else if (l->type == tok_identifier && l->content == "runtime")
931 {
932 if (! (r->type == tok_string))
f0454224 933 throw PARSE_ERROR (_("expected string literal"), r);
d9677d7b 934
47d349b1 935 string query_runtime = r->content;
d9677d7b
DS
936 string target_runtime;
937
938 target_runtime = (s.runtime_mode == systemtap_session::dyninst_runtime
939 ? "dyninst" : "kernel");
940 int nomatch = fnmatch (query_runtime.c_str(),
941 target_runtime.c_str(),
942 FNM_NOESCAPE); // still spooky
943
944 bool result;
945 if (op->type == tok_operator && op->content == "==")
946 result = !nomatch;
947 else if (op->type == tok_operator && op->content == "!=")
948 result = nomatch;
949 else
f0454224 950 throw PARSE_ERROR (_("expected '==' or '!='"), op);
d9677d7b 951
561079c8
FCE
952 return result;
953 }
f5a34c5a 954 else if (l->type == tok_identifier && l->content.starts_with("CONFIG_"))
561079c8 955 {
717a457b
MW
956 if (r->type == tok_string)
957 {
47d349b1
FCE
958 string lhs = s.kernel_config[l->content]; // may be empty
959 string rhs = r->content;
561079c8 960
717a457b 961 int nomatch = fnmatch (rhs.c_str(), lhs.c_str(), FNM_NOESCAPE); // still spooky
561079c8 962
717a457b
MW
963 bool result;
964 if (op->type == tok_operator && op->content == "==")
965 result = !nomatch;
966 else if (op->type == tok_operator && op->content == "!=")
967 result = nomatch;
968 else
f0454224 969 throw PARSE_ERROR (_("expected '==' or '!='"), op);
dff50e09 970
717a457b
MW
971 return result;
972 }
973 else if (r->type == tok_number)
974 {
7371cd19
JS
975 const string& lhs_string = s.kernel_config[l->content];
976 const char* startp = lhs_string.c_str ();
717a457b
MW
977 char* endp = (char*) startp;
978 errno = 0;
979 int64_t lhs = (int64_t) strtoll (startp, & endp, 0);
980 if (errno == ERANGE || errno == EINVAL || *endp != '\0')
f0454224 981 throw PARSE_ERROR ("Config option value not a number", l);
717a457b 982
47d349b1 983 int64_t rhs = lex_cast<int64_t>(r->content);
717a457b
MW
984 return eval_comparison (lhs, op, rhs);
985 }
4227f98d 986 else if (r->type == tok_identifier
f5a34c5a 987 && r->content.starts_with( "CONFIG_"))
4227f98d
MW
988 {
989 // First try to convert both to numbers,
990 // otherwise threat both as strings.
7371cd19
JS
991 const string& lhs_string = s.kernel_config[l->content];
992 const string& rhs_string = s.kernel_config[r->content];
993 const char* startp = lhs_string.c_str ();
4227f98d
MW
994 char* endp = (char*) startp;
995 errno = 0;
996 int64_t val = (int64_t) strtoll (startp, & endp, 0);
997 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
998 {
999 int64_t lhs = val;
7371cd19 1000 startp = rhs_string.c_str ();
4227f98d
MW
1001 endp = (char*) startp;
1002 errno = 0;
1003 int64_t rhs = (int64_t) strtoll (startp, & endp, 0);
1004 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
1005 return eval_comparison (lhs, op, rhs);
1006 }
1007
7371cd19 1008 return eval_comparison (lhs_string, op, rhs_string);
4227f98d 1009 }
717a457b 1010 else
f0454224 1011 throw PARSE_ERROR (_("expected string, number literal or other CONFIG_... as right side operand"), r);
dff50e09 1012 }
c434ec7e 1013 else if (l->type == tok_string && r->type == tok_string)
5811366a 1014 {
47d349b1
FCE
1015 string lhs = l->content;
1016 string rhs = r->content;
c434ec7e
FCE
1017 return eval_comparison (lhs, op, rhs);
1018 // NB: no wildcarding option here
1019 }
1020 else if (l->type == tok_number && r->type == tok_number)
1021 {
47d349b1
FCE
1022 int64_t lhs = lex_cast<int64_t>(l->content);
1023 int64_t rhs = lex_cast<int64_t>(r->content);
c434ec7e 1024 return eval_comparison (lhs, op, rhs);
7a468d68 1025 // NB: no wildcarding option here
5811366a
FCE
1026 }
1027 else if (l->type == tok_string && r->type == tok_number
1028 && op->type == tok_operator)
f0454224 1029 throw PARSE_ERROR (_("expected string literal as right value"), r);
5811366a
FCE
1030 else if (l->type == tok_number && r->type == tok_string
1031 && op->type == tok_operator)
f0454224 1032 throw PARSE_ERROR (_("expected number literal as right value"), r);
c434ec7e 1033
177a8ead 1034 else
f0454224 1035 throw PARSE_ERROR (_("expected 'arch', 'kernel_v', 'kernel_vr', 'systemtap_v',\n"
d9677d7b
DS
1036 " 'runtime', 'systemtap_privilege', 'CONFIG_...', or\n"
1037 " comparison between strings or integers"), l);
177a8ead
FCE
1038}
1039
1040
5811366a 1041// Only tokens corresponding to the TRUE statement must be expanded
177a8ead 1042const token*
b5477cd9 1043parser::scan_pp ()
177a8ead
FCE
1044{
1045 while (true)
1046 {
e92f2566
JS
1047 pp_state_t pp = PP_NONE;
1048 if (!pp_state.empty())
1049 pp = pp_state.back().second;
1050
1051 const token* t = 0;
1052 if (pp == PP_SKIP_THEN || pp == PP_SKIP_ELSE)
1053 t = skip_pp ();
1054 else
534aad8b 1055 t = scan_pp1 ();
e92f2566
JS
1056
1057 if (t == 0) // EOF
177a8ead 1058 {
e92f2566
JS
1059 if (pp != PP_NONE)
1060 {
1061 t = pp_state.back().first;
1062 pp_state.pop_back(); // so skip_some doesn't keep trying to close this
ce0f6648 1063 //TRANSLATORS: 'conditional' meaning 'conditional preprocessing'
f0454224 1064 throw PARSE_ERROR (_("incomplete conditional at end of file"), t);
e92f2566 1065 }
177a8ead
FCE
1066 return t;
1067 }
1068
e92f2566
JS
1069 // misplaced preprocessor "then"
1070 if (t->type == tok_operator && t->content == "%?")
f0454224 1071 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
e92f2566
JS
1072
1073 // preprocessor "else"
1074 if (t->type == tok_operator && t->content == "%:")
1075 {
1076 if (pp == PP_NONE)
f0454224 1077 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
e92f2566 1078 if (pp == PP_KEEP_ELSE || pp == PP_SKIP_ELSE)
f0454224 1079 throw PARSE_ERROR (_("invalid conditional - duplicate '%:'"), t);
1d94e4e5
SM
1080 // XXX: here and elsewhere, error cascades might be avoided
1081 // by dropping tokens until we reach the closing %)
e92f2566
JS
1082
1083 pp_state.back().second = (pp == PP_KEEP_THEN) ?
1084 PP_SKIP_ELSE : PP_KEEP_ELSE;
1085 delete t;
1086 continue;
1087 }
1088
1089 // preprocessor close
1090 if (t->type == tok_operator && t->content == "%)")
1091 {
1092 if (pp == PP_NONE)
f0454224 1093 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
e92f2566 1094 delete pp_state.back().first;
a07a2c28 1095 delete t; //this is the closing bracket
e92f2566
JS
1096 pp_state.pop_back();
1097 continue;
1098 }
dff50e09 1099
177a8ead
FCE
1100 if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
1101 return t;
1102
1103 // We have a %( - it's time to throw a preprocessing party!
1104
2d7881bf
PP
1105 bool result = false;
1106 bool and_result = true;
1107 const token *n = NULL;
1108 do {
1109 const token *l, *op, *r;
534aad8b
SM
1110 l = scan_pp1 ();
1111 op = scan_pp1 ();
1112 r = scan_pp1 ();
2d7881bf 1113 if (l == 0 || op == 0 || r == 0)
f0454224 1114 throw PARSE_ERROR (_("incomplete condition after '%('"), t);
2d7881bf
PP
1115 // NB: consider generalizing to consume all tokens until %?, and
1116 // passing that as a vector to an evaluator.
1117
1118 // Do not evaluate the condition if we haven't expanded everything.
1119 // This may occur when having several recursive conditionals.
1120 and_result &= eval_pp_conditional (session, l, op, r);
a07a2c28
LB
1121 if(l->content=="systemtap_v")
1122 systemtap_v_seen=r;
1123
1124 else
1125 delete r;
1126
2d7881bf
PP
1127 delete l;
1128 delete op;
2d7881bf
PP
1129 delete n;
1130
534aad8b 1131 n = scan_pp1 ();
2d7881bf
PP
1132 if (n && n->type == tok_operator && n->content == "&&")
1133 continue;
1134 result |= and_result;
1135 and_result = true;
1136 if (! (n && n->type == tok_operator && n->content == "||"))
1137 break;
1138 } while (true);
3f847830
FCE
1139
1140 /*
1141 clog << "PP eval (" << *t << ") == " << result << endl;
1142 */
1143
e92f2566 1144 const token *m = n;
177a8ead 1145 if (! (m && m->type == tok_operator && m->content == "%?"))
f0454224 1146 throw PARSE_ERROR (_("expected '%?' marker for conditional"), t);
70c743d8 1147 delete m; // "%?"
177a8ead 1148
e92f2566
JS
1149 pp = result ? PP_KEEP_THEN : PP_SKIP_THEN;
1150 pp_state.push_back (make_pair (t, pp));
3f847830 1151
e92f2566
JS
1152 // Now loop around to look for a real token.
1153 }
1154}
3f847830 1155
3f847830 1156
e92f2566
JS
1157// Skip over tokens and any errors, heeding
1158// only nested preprocessor starts and ends.
1159const token*
1160parser::skip_pp ()
1161{
1162 const token* t = 0;
1163 unsigned nesting = 0;
1164 do
1165 {
1166 try
1167 {
ed891cf3 1168 t = scan_pp1 (true);
177a8ead 1169 }
e92f2566 1170 catch (const parse_error &e)
70c743d8 1171 {
e92f2566 1172 continue;
70c743d8 1173 }
e92f2566
JS
1174 if (!t)
1175 break;
1176 if (t->type == tok_operator && t->content == "%(")
1177 ++nesting;
1178 else if (nesting && t->type == tok_operator && t->content == "%)")
1179 --nesting;
1180 else if (!nesting && t->type == tok_operator &&
1181 (t->content == "%:" || t->content == "%?" || t->content == "%)"))
1182 break;
1183 delete t;
177a8ead 1184 }
e92f2566
JS
1185 while (true);
1186 return t;
177a8ead
FCE
1187}
1188
1189
2f1a1aea 1190const token*
b5477cd9 1191parser::next ()
2f1a1aea
FCE
1192{
1193 if (! next_t)
b5477cd9 1194 next_t = scan_pp ();
2f1a1aea 1195 if (! next_t)
f0454224 1196 throw PARSE_ERROR (_("unexpected end-of-file"));
2f1a1aea 1197
2f1a1aea
FCE
1198 last_t = next_t;
1199 // advance by zeroing next_t
1200 next_t = 0;
1201 return last_t;
1202}
1203
1204
1205const token*
b5477cd9 1206parser::peek ()
2f1a1aea
FCE
1207{
1208 if (! next_t)
b5477cd9 1209 next_t = scan_pp ();
2f1a1aea
FCE
1210
1211 // don't advance by zeroing next_t
1212 last_t = next_t;
1213 return next_t;
1214}
1215
1216
731a5359
MW
1217void
1218parser::swallow ()
1219{
1220 // can only swallow something last peeked or nexted token.
1221 assert (last_t != 0);
1222 delete last_t;
1223 // advance by zeroing next_t
1224 last_t = next_t = 0;
1225}
1226
1227
d7f3e0c5
GH
1228static inline bool
1229tok_is(token const * t, token_type tt, string const & expected)
1230{
1231 return t && t->type == tt && t->content == expected;
1232}
1233
1234
731a5359 1235void
d7f3e0c5
GH
1236parser::expect_known (token_type tt, string const & expected)
1237{
1238 const token *t = next();
57b73400 1239 if (! (t && t->type == tt && t->content == expected))
f0454224 1240 throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
731a5359 1241 swallow (); // We are done with it, content was copied.
d7f3e0c5
GH
1242}
1243
1244
a3e980f9 1245void
47d349b1 1246parser::expect_unknown (token_type tt, interned_string & target)
a3e980f9
FCE
1247{
1248 const token *t = next();
1249 if (!(t && t->type == tt))
1250 throw PARSE_ERROR (_("expected ") + tt2str(tt));
1251 target = t->content;
1252 swallow (); // We are done with it, content was copied.
1253}
1254
d7f3e0c5 1255
a3e980f9 1256void
47d349b1 1257parser::expect_unknown2 (token_type tt1, token_type tt2, interned_string & target)
a3e980f9
FCE
1258{
1259 const token *t = next();
1260 if (!(t && (t->type == tt1 || t->type == tt2)))
1261 throw PARSE_ERROR (_F("expected %s or %s", tt2str(tt1).c_str(), tt2str(tt2).c_str()));
1262 target = t->content;
1263 swallow (); // We are done with it, content was copied.
1264}
1265
493ee224 1266
731a5359 1267void
aa389a19 1268parser::expect_op (string const & expected)
d7f3e0c5 1269{
731a5359 1270 expect_known (tok_operator, expected);
d7f3e0c5
GH
1271}
1272
1efdc9a9 1273interned_string
891b96e6 1274parser::expect_op_any (initializer_list<const char*> expected)
1efdc9a9
FL
1275{
1276 const token *t = next();
891b96e6
JS
1277 if (t && t->type == tok_operator)
1278 for (auto it = expected.begin(); it != expected.end(); ++it)
1279 if (t->content == *it)
1efdc9a9 1280 {
891b96e6
JS
1281 interned_string found = t->content;
1282 swallow (); // We are done with it, content was copied.
1283 return found;
1efdc9a9 1284 }
891b96e6
JS
1285
1286 string msg;
1287 for (auto it = expected.begin(); it != expected.end(); ++it)
1288 {
1289 if (it != expected.begin())
1efdc9a9
FL
1290 msg.append(" ");
1291 msg.append(*it);
1292 }
891b96e6 1293 throw PARSE_ERROR (_F("expected one of '%s'", msg.c_str()));
1efdc9a9 1294}
d7f3e0c5 1295
731a5359 1296void
aa389a19 1297parser::expect_kw (string const & expected)
d7f3e0c5 1298{
731a5359 1299 expect_known (tok_keyword, expected);
d7f3e0c5
GH
1300}
1301
dff50e09 1302const token*
aa389a19 1303parser::expect_kw_token (string const & expected)
731a5359
MW
1304{
1305 const token *t = next();
1306 if (! (t && t->type == tok_keyword && t->content == expected))
f0454224 1307 throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
731a5359
MW
1308 return t;
1309}
1310
1311void
e38723d2 1312parser::expect_number (int64_t & value)
57b73400 1313{
e38723d2
MH
1314 bool neg = false;
1315 const token *t = next();
1316 if (t->type == tok_operator && t->content == "-")
1317 {
1318 neg = true;
731a5359 1319 swallow ();
e38723d2
MH
1320 t = next ();
1321 }
1322 if (!(t && t->type == tok_number))
f0454224 1323 throw PARSE_ERROR (_("expected number"));
e38723d2 1324
7371cd19
JS
1325 const string& s = t->content;
1326 const char* startp = s.c_str ();
e38723d2
MH
1327 char* endp = (char*) startp;
1328
1329 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1330 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1331 // since the lexer only gives us positive digit strings, but we'll
1332 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1333 errno = 0;
1334 value = (int64_t) strtoull (startp, & endp, 0);
1335 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
1336 || (neg && (unsigned long long) value > 9223372036854775808ULL)
1337 || (unsigned long long) value > 18446744073709551615ULL
1338 || value < -9223372036854775807LL-1)
f0454224 1339 throw PARSE_ERROR (_("number invalid or out of range"));
dff50e09 1340
e38723d2
MH
1341 if (neg)
1342 value = -value;
1343
731a5359 1344 swallow (); // We are done with it, content was parsed and copied into value.
57b73400
GH
1345}
1346
d7f3e0c5 1347
dff50e09 1348const token*
b1f2b0e8 1349parser::expect_ident_or_atword (interned_string & target)
d7f3e0c5 1350{
06219d6f
SM
1351 const token *t = next();
1352
1353 // accept identifiers and operators beginning in '@':
1354 if (!t || (t->type != tok_identifier
1355 && (t->type != tok_operator || t->content[0] != '@')))
1356 // XXX currently this is only called from parse_hist_op_or_bare_name(),
1357 // so the message is accurate, but keep an eye out in the future:
f0454224 1358 throw PARSE_ERROR (_F("expected %s or statistical operation", tt2str(tok_identifier).c_str()));
06219d6f 1359
47d349b1 1360 target = t->content;
06219d6f 1361 return t;
d7f3e0c5
GH
1362}
1363
1364
731a5359 1365void
b1f2b0e8 1366parser::expect_ident_or_keyword (interned_string & target)
493ee224 1367{
731a5359 1368 expect_unknown2 (tok_identifier, tok_keyword, target);
493ee224
DS
1369}
1370
1371
dff50e09 1372bool
aa389a19 1373parser::peek_op (string const & op)
d7f3e0c5
GH
1374{
1375 return tok_is (peek(), tok_operator, op);
1376}
1377
1378
dff50e09 1379bool
aa389a19 1380parser::peek_kw (string const & kw)
d7f3e0c5
GH
1381{
1382 return tok_is (peek(), tok_identifier, kw);
1383}
1384
1385
1386
f086fe02
MC
1387lexer::lexer (istream& input, const string& in, systemtap_session& s, bool cc):
1388 ate_comment(false), ate_whitespace(false), saw_tokens(false), check_compatible(cc),
aa389a19
FCE
1389 input_name (in), input_pointer (0), input_end (0), cursor_suspend_count(0),
1390 cursor_suspend_line (1), cursor_suspend_column (1), cursor_line (1),
1391 cursor_column (1), session(s), current_file (0), current_token_chain (0)
eacb10ce 1392{
aa389a19 1393 getline(input, input_contents, '\0');
73ddea86 1394
4915b3e4
AJ
1395 input_pointer = input_contents.data();
1396 input_end = input_contents.data() + input_contents.size();
66c7d4c1
JS
1397
1398 if (keywords.empty())
1399 {
3a7ec735
FCE
1400 // NB: adding new keywords is highly disruptive to the language,
1401 // in particular to existing scripts that could be suddenly
1402 // broken. If done at all, it has to be s.compatible-sensitive,
1403 // and broadly advertised.
66c7d4c1
JS
1404 keywords.insert("probe");
1405 keywords.insert("global");
38bf68a8
MC
1406 if (has_version("3.0"))
1407 keywords.insert("private");
66c7d4c1
JS
1408 keywords.insert("function");
1409 keywords.insert("if");
1410 keywords.insert("else");
1411 keywords.insert("for");
1412 keywords.insert("foreach");
1413 keywords.insert("in");
1414 keywords.insert("limit");
1415 keywords.insert("return");
1416 keywords.insert("delete");
1417 keywords.insert("while");
1418 keywords.insert("break");
1419 keywords.insert("continue");
1420 keywords.insert("next");
1421 keywords.insert("string");
1422 keywords.insert("long");
f4fe2e93
FCE
1423 keywords.insert("try");
1424 keywords.insert("catch");
66c7d4c1 1425 }
2524d1fd
SM
1426
1427 if (atwords.empty())
1428 {
1429 // NB: adding new @words is mildly disruptive to existing
1430 // scripts that define macros with the same name, but not
1431 // really. The user will merely receive a warning that they are
1432 // redefining an existing operator.
17f56d2a
JS
1433
1434 // These are inserted without the actual '@', so we can directly check
1435 // proposed macro names without building a string with that prefix.
1436 atwords.insert("cast");
1437 atwords.insert("defined");
0a7eb12d
MC
1438 if (has_version("3.1"))
1439 atwords.insert("const");
17f56d2a
JS
1440 atwords.insert("entry");
1441 atwords.insert("perf");
1442 atwords.insert("var");
1443 atwords.insert("avg");
1444 atwords.insert("count");
1445 atwords.insert("sum");
1446 atwords.insert("min");
1447 atwords.insert("max");
1448 atwords.insert("hist_linear");
1449 atwords.insert("hist_log");
2524d1fd 1450 }
eacb10ce 1451}
2f1a1aea 1452
85c97fc2 1453unordered_set<string> lexer::atwords;
66c7d4c1 1454
1b1b4ceb
RA
1455void
1456lexer::set_current_file (stapfile* f)
1457{
1458 current_file = f;
2203b032
JS
1459 if (f)
1460 {
47d349b1 1461 f->file_contents = input_contents;
2203b032
JS
1462 f->name = input_name;
1463 }
1b1b4ceb 1464}
bb2e3076 1465
101b0805
JS
1466void
1467lexer::set_current_token_chain (const token* tok)
1468{
1469 current_token_chain = tok;
1470}
1471
bb2e3076
FCE
1472int
1473lexer::input_peek (unsigned n)
1474{
aa389a19
FCE
1475 if (input_pointer + n >= input_end)
1476 return -1; // EOF
1477 return (unsigned char)*(input_pointer + n);
bb2e3076
FCE
1478}
1479
1480
f8405ea5
JS
1481bool
1482lexer::has_version (const char* v) const
1483{
1484 return check_compatible
1485 ? strverscmp(session.compatible.c_str(), v) >= 0
1486 : true;
1487}
1488
dff50e09 1489int
2f1a1aea
FCE
1490lexer::input_get ()
1491{
66c7d4c1 1492 int c = input_peek();
bb2e3076
FCE
1493 if (c < 0) return c; // EOF
1494
aa389a19 1495 ++input_pointer;
66c7d4c1 1496
aa389a19
FCE
1497 if (cursor_suspend_count)
1498 {
1499 // Track effect of input_put: preserve previous cursor/line_column
1500 // until all of its characters are consumed.
1501 if (--cursor_suspend_count == 0)
1502 {
1503 cursor_line = cursor_suspend_line;
1504 cursor_column = cursor_suspend_column;
1505 }
1506 }
3f99432c 1507 else
2f1a1aea 1508 {
3f99432c
FCE
1509 // update source cursor
1510 if (c == '\n')
1511 {
1512 cursor_line ++;
1513 cursor_column = 1;
1514 }
1515 else
1516 cursor_column ++;
2f1a1aea 1517 }
2f1a1aea 1518
eacb10ce 1519 // clog << "[" << (char)c << "]";
2f1a1aea
FCE
1520 return c;
1521}
1522
1523
3f99432c 1524void
aa389a19 1525lexer::input_put (const string& chars, const token* t)
3f99432c 1526{
aa389a19
FCE
1527 size_t pos = input_pointer - input_contents.data();
1528 // clog << "[put:" << chars << " @" << pos << "]";
1529 input_contents.insert (pos, chars);
1530 cursor_suspend_count += chars.size();
1531 cursor_suspend_line = cursor_line;
1532 cursor_suspend_column = cursor_column;
1533 cursor_line = t->location.line;
1534 cursor_column = t->location.column;
1535 input_pointer = input_contents.data() + pos;
1536 input_end = input_contents.data() + input_contents.size();
46b5cfb2
AJ
1537}
1538
3f99432c 1539
2f1a1aea 1540token*
b5477cd9 1541lexer::scan ()
2f1a1aea 1542{
fee28e5c 1543 ate_comment = false; // reset for each new token
b5477cd9 1544 ate_whitespace = false; // reset for each new token
534aad8b
SM
1545
1546 // XXX be very sure to restore old_saw_tokens if we return without a token:
1547 bool old_saw_tokens = saw_tokens;
1548 saw_tokens = true;
1549
2f1a1aea 1550 token* n = new token;
aa389a19 1551 string token_str; // accumulate here instead of by incremental interning
2203b032 1552 n->location.file = current_file;
101b0805 1553 n->chain = current_token_chain;
2f1a1aea 1554
9300f661 1555skip:
aa389a19 1556 bool suspended = (cursor_suspend_count > 0);
2f1a1aea
FCE
1557 n->location.line = cursor_line;
1558 n->location.column = cursor_column;
1559
1560 int c = input_get();
3f99432c 1561 // clog << "{" << (char)c << (char)c2 << "}";
2f1a1aea
FCE
1562 if (c < 0)
1563 {
1564 delete n;
534aad8b 1565 saw_tokens = old_saw_tokens;
2f1a1aea
FCE
1566 return 0;
1567 }
1568
1569 if (isspace (c))
b5477cd9
SM
1570 {
1571 ate_whitespace = true;
1572 goto skip;
1573 }
2f1a1aea 1574
66c7d4c1
JS
1575 int c2 = input_peek ();
1576
3f99432c
FCE
1577 // Paste command line arguments as character streams into
1578 // the beginning of a token. $1..$999 go through as raw
1579 // characters; @1..@999 are quoted/escaped as strings.
1580 // $# and @# expand to the number of arguments, similarly
1581 // raw or quoted.
9300f661 1582 if ((c == '$' || c == '@') && (c2 == '#'))
3f99432c 1583 {
aa389a19
FCE
1584 token_str.push_back (c);
1585 token_str.push_back (c2);
3f99432c 1586 input_get(); // swallow '#'
46b5cfb2 1587
9300f661 1588 if (suspended)
16fc963f 1589 {
10e7c19d 1590 n->make_junk(tok_junk_nested_arg);
16fc963f
SM
1591 return n;
1592 }
aa389a19
FCE
1593 size_t num_args = session.args.size ();
1594 input_put ((c == '$') ? lex_cast (num_args) : lex_cast_qstring (num_args), n);
1595 token_str.clear();
9300f661 1596 goto skip;
3f99432c 1597 }
9300f661 1598 else if ((c == '$' || c == '@') && (isdigit (c2)))
3f99432c
FCE
1599 {
1600 unsigned idx = 0;
aa389a19 1601 token_str.push_back (c);
3f99432c
FCE
1602 do
1603 {
1604 input_get ();
aa389a19 1605 token_str.push_back (c2);
3f99432c
FCE
1606 idx = (idx * 10) + (c2 - '0');
1607 c2 = input_peek ();
1608 } while (c2 > 0 &&
dff50e09 1609 isdigit (c2) &&
3f99432c 1610 idx <= session.args.size()); // prevent overflow
16fc963f
SM
1611 if (suspended)
1612 {
10e7c19d 1613 n->make_junk(tok_junk_nested_arg);
16fc963f
SM
1614 return n;
1615 }
3f99432c
FCE
1616 if (idx == 0 ||
1617 idx-1 >= session.args.size())
16fc963f 1618 {
10e7c19d 1619 n->make_junk(tok_junk_invalid_arg);
16fc963f
SM
1620 return n;
1621 }
aa389a19
FCE
1622 const string& arg = session.args[idx-1];
1623 input_put ((c == '$') ? arg : lex_cast_qstring (arg), n);
1624 token_str.clear();
9300f661 1625 goto skip;
3f99432c
FCE
1626 }
1627
b5477cd9 1628 else if (isalpha (c) || c == '$' || c == '@' || c == '_')
2f1a1aea 1629 {
aa389a19 1630 token_str = (char) c;
b5477cd9 1631 while (isalnum (c2) || c2 == '_' || c2 == '$')
2f1a1aea 1632 {
3f99432c 1633 input_get ();
aa389a19 1634 token_str.push_back (c2);
3f99432c 1635 c2 = input_peek ();
6e213f58 1636 }
5775f11f 1637 n->content = token_str;
213bee8f 1638
5775f11f 1639 if (n->content[0] == '@')
dd90d565 1640 // makes it easier to detect illegal use of @words:
06219d6f 1641 n->type = tok_operator;
5775f11f
JS
1642 else if (keywords.count(n->content))
1643 n->type = tok_keyword;
1644 else
1645 n->type = tok_identifier;
dff50e09 1646
2f1a1aea
FCE
1647 return n;
1648 }
1649
3a20432b 1650 else if (isdigit (c)) // positive literal
2f1a1aea 1651 {
2f1a1aea 1652 n->type = tok_number;
aa389a19 1653 token_str = (char) c;
9c0c0e46 1654
66c7d4c1 1655 while (isalnum (c2))
2f1a1aea 1656 {
9c0c0e46
FCE
1657 // NB: isalnum is very permissive. We rely on strtol, called in
1658 // parser::parse_literal below, to confirm that the number string
1659 // is correctly formatted and in range.
1660
66c7d4c1 1661 input_get ();
aa389a19 1662 token_str.push_back (c2);
66c7d4c1 1663 c2 = input_peek ();
2f1a1aea 1664 }
aa389a19 1665
47d349b1 1666 n->content = token_str;
2f1a1aea
FCE
1667 return n;
1668 }
1669
1670 else if (c == '\"')
1671 {
1672 n->type = tok_string;
1673 while (1)
1674 {
1675 c = input_get ();
1676
3f99432c 1677 if (c < 0 || c == '\n')
2f1a1aea 1678 {
10e7c19d 1679 n->make_junk(tok_junk_unclosed_quote);
16fc963f 1680 return n;
2f1a1aea
FCE
1681 }
1682 if (c == '\"') // closing double-quotes
1683 break;
3f99432c 1684 else if (c == '\\') // see also input_put
dff50e09 1685 {
aa389a19
FCE
1686 c = input_get();
1687 switch (c)
7d46afb8 1688 {
ef8a6134 1689 case 'x':
f8405ea5 1690 if (!has_version("2.3"))
ef8a6134 1691 goto the_default;
c92d3b42 1692 /* FALLTHROUGH */
7d46afb8
GH
1693 case 'a':
1694 case 'b':
1695 case 't':
1696 case 'n':
1697 case 'v':
1698 case 'f':
1699 case 'r':
f03954fd 1700 case '0' ... '7': // NB: need only match the first digit
7d46afb8 1701 case '\\':
7d46afb8 1702 // Pass these escapes through to the string value
dff50e09 1703 // being parsed; it will be emitted into a C literal.
c7c8d469
FCE
1704 // XXX: PR13371: perhaps we should evaluate them here
1705 // (and re-quote them during translate.cxx emission).
aa389a19 1706 token_str.push_back ('\\');
7d46afb8 1707
3f99432c 1708 // fall through
ef8a6134 1709 default: the_default:
aa389a19
FCE
1710 token_str.push_back (c);
1711 break;
7d46afb8 1712 }
2f1a1aea
FCE
1713 }
1714 else
aa389a19 1715 token_str.push_back (c);
2f1a1aea 1716 }
47d349b1 1717 n->content = token_str;
2f1a1aea
FCE
1718 return n;
1719 }
1720
1721 else if (ispunct (c))
1722 {
bb2e3076 1723 int c3 = input_peek (1);
2f1a1aea 1724
3a20432b
FCE
1725 // NB: if we were to recognize negative numeric literals here,
1726 // we'd introduce another grammar ambiguity:
1727 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
1728 // instead of tok_number(1) tok_operator('-') tok_number(1)
1729
66c7d4c1 1730 if (c == '#') // shell comment
2f1a1aea
FCE
1731 {
1732 unsigned this_line = cursor_line;
bb2e3076
FCE
1733 do { c = input_get (); }
1734 while (c >= 0 && cursor_line == this_line);
fee28e5c 1735 ate_comment = true;
b5477cd9 1736 ate_whitespace = true;
2f1a1aea
FCE
1737 goto skip;
1738 }
66c7d4c1 1739 else if ((c == '/' && c2 == '/')) // C++ comment
63a7c90e
FCE
1740 {
1741 unsigned this_line = cursor_line;
bb2e3076
FCE
1742 do { c = input_get (); }
1743 while (c >= 0 && cursor_line == this_line);
fee28e5c 1744 ate_comment = true;
b5477cd9 1745 ate_whitespace = true;
63a7c90e
FCE
1746 goto skip;
1747 }
1748 else if (c == '/' && c2 == '*') // C comment
1749 {
66c7d4c1
JS
1750 (void) input_get (); // swallow '*' already in c2
1751 c = input_get ();
63a7c90e 1752 c2 = input_get ();
bb2e3076 1753 while (c2 >= 0)
63a7c90e 1754 {
66c7d4c1
JS
1755 if (c == '*' && c2 == '/')
1756 break;
63a7c90e
FCE
1757 c = c2;
1758 c2 = input_get ();
63a7c90e 1759 }
fee28e5c 1760 ate_comment = true;
b5477cd9 1761 ate_whitespace = true;
bb2e3076 1762 goto skip;
63a7c90e 1763 }
54dfabe9
FCE
1764 else if (c == '%' && c2 == '{') // embedded code
1765 {
1766 n->type = tok_embedded;
1767 (void) input_get (); // swallow '{' already in c2
66c7d4c1 1768 c = input_get ();
aa389a19 1769 c2 = input_get ();
66c7d4c1 1770 while (c2 >= 0)
54dfabe9 1771 {
66c7d4c1 1772 if (c == '%' && c2 == '}')
46b5cfb2 1773 {
47d349b1 1774 n->content = token_str;
46b5cfb2
AJ
1775 return n;
1776 }
ebbf9df4
FCE
1777 if (c == '}' && c2 == '%') // possible typo
1778 session.print_warning (_("possible erroneous closing '}%', use '%}'?"), n);
aa389a19
FCE
1779 token_str.push_back (c);
1780 c = c2;
1781 c2 = input_get();
54dfabe9 1782 }
72cdb9cd 1783
10e7c19d 1784 n->make_junk(tok_junk_unclosed_embedded);
46b5cfb2 1785 return n;
54dfabe9 1786 }
2f1a1aea 1787
bb2e3076
FCE
1788 // We're committed to recognizing at least the first character
1789 // as an operator.
2f1a1aea 1790 n->type = tok_operator;
aa389a19 1791 token_str = (char) c;
2f1a1aea 1792
bb2e3076 1793 // match all valid operators, in decreasing size order
66c7d4c1
JS
1794 if ((c == '<' && c2 == '<' && c3 == '<') ||
1795 (c == '<' && c2 == '<' && c3 == '=') ||
1796 (c == '>' && c2 == '>' && c3 == '='))
82919855 1797 {
aa389a19
FCE
1798 token_str.push_back (c2);
1799 token_str.push_back (c3);
46b5cfb2 1800 input_get (); // c2
46b5cfb2 1801 input_get (); // c3
bb2e3076 1802 }
66c7d4c1
JS
1803 else if ((c == '=' && c2 == '=') ||
1804 (c == '!' && c2 == '=') ||
1805 (c == '<' && c2 == '=') ||
1806 (c == '>' && c2 == '=') ||
93daaca8
SM
1807 (c == '=' && c2 == '~') ||
1808 (c == '!' && c2 == '~') ||
66c7d4c1
JS
1809 (c == '+' && c2 == '=') ||
1810 (c == '-' && c2 == '=') ||
1811 (c == '*' && c2 == '=') ||
1812 (c == '/' && c2 == '=') ||
1813 (c == '%' && c2 == '=') ||
1814 (c == '&' && c2 == '=') ||
1815 (c == '^' && c2 == '=') ||
1816 (c == '|' && c2 == '=') ||
1817 (c == '.' && c2 == '=') ||
1818 (c == '&' && c2 == '&') ||
1819 (c == '|' && c2 == '|') ||
1820 (c == '+' && c2 == '+') ||
1821 (c == '-' && c2 == '-') ||
1822 (c == '-' && c2 == '>') ||
1823 (c == '<' && c2 == '<') ||
1824 (c == '>' && c2 == '>') ||
177a8ead 1825 // preprocessor tokens
66c7d4c1
JS
1826 (c == '%' && c2 == '(') ||
1827 (c == '%' && c2 == '?') ||
1828 (c == '%' && c2 == ':') ||
1829 (c == '%' && c2 == ')'))
bb2e3076 1830 {
aa389a19 1831 token_str.push_back (c2);
bb2e3076 1832 input_get (); // swallow other character
dff50e09 1833 }
2f1a1aea 1834
47d349b1 1835 n->content = token_str;
2f1a1aea
FCE
1836 return n;
1837 }
1838
1839 else
1840 {
1841 n->type = tok_junk;
e3795795
FCE
1842 ostringstream s;
1843 s << "\\x" << hex << setw(2) << setfill('0') << c;
47d349b1 1844 n->content = s.str();
10e7c19d
JS
1845 // signal parser to emit "expected X, found junk" type error
1846 n->make_junk(tok_junk_unknown);
2f1a1aea
FCE
1847 return n;
1848 }
1849}
1850
16fc963f
SM
1851// ------------------------------------------------------------------------
1852
1853void
10e7c19d 1854token::make_junk (token_junk_type junk)
16fc963f
SM
1855{
1856 type = tok_junk;
10e7c19d
JS
1857 junk_type = junk;
1858}
1859
1860// ------------------------------------------------------------------------
1861
1862string
1863token::junk_message(systemtap_session& session) const
1864{
1865 switch (junk_type)
1866 {
1867 case tok_junk_nested_arg:
1868 return _("invalid nested substitution of command line arguments");
1869
1870 case tok_junk_invalid_arg:
1871 return _F("command line argument out of range [1-%lu]",
1872 (unsigned long) session.args.size());
1873
1874 case tok_junk_unclosed_quote:
1875 return _("Could not find matching closing quote");
1876
1877 case tok_junk_unclosed_embedded:
1878 return _("Could not find matching '%}' to close embedded function block");
1879
1880 default:
1881 return _("unknown junk token");
1882 }
16fc963f 1883}
2f1a1aea
FCE
1884
1885// ------------------------------------------------------------------------
1886
1887stapfile*
f8405ea5 1888parser::parse ()
2f1a1aea
FCE
1889{
1890 stapfile* f = new stapfile;
1b1b4ceb 1891 input.set_current_file (f);
56099f08
FCE
1892
1893 bool empty = true;
1894
2f1a1aea
FCE
1895 while (1)
1896 {
1897 try
1898 {
a07a2c28 1899 systemtap_v_seen = 0;
2f1a1aea 1900 const token* t = peek ();
534aad8b 1901 if (! t) // nice clean EOF, modulo any preprocessing that occurred
2f1a1aea
FCE
1902 break;
1903
56099f08 1904 empty = false;
6e213f58
DS
1905 if (t->type == tok_keyword && t->content == "probe")
1906 {
1907 context = con_probe;
1908 parse_probe (f->probes, f->aliases);
1909 }
38bf68a8
MC
1910 else if (t->type == tok_keyword && t->content == "private")
1911 {
1912 context = con_unknown;
1913 parse_private (f->globals, f->probes, f->name, f->functions);
1914 }
6e213f58
DS
1915 else if (t->type == tok_keyword && t->content == "global")
1916 {
1917 context = con_global;
38bf68a8 1918 parse_global (f->globals, f->probes, f->name);
6e213f58
DS
1919 }
1920 else if (t->type == tok_keyword && t->content == "function")
1921 {
1922 context = con_function;
38bf68a8 1923 parse_functiondecl (f->functions, f->name);
6e213f58 1924 }
54dfabe9 1925 else if (t->type == tok_embedded)
6e213f58
DS
1926 {
1927 context = con_embedded;
1928 f->embeds.push_back (parse_embeddedcode ());
1929 }
2f1a1aea 1930 else
6e213f58
DS
1931 {
1932 context = con_unknown;
42eed2a0 1933 throw PARSE_ERROR (_("expected 'probe', 'global', 'private', 'function', or '%{'"));
6e213f58 1934 }
2f1a1aea
FCE
1935 }
1936 catch (parse_error& pe)
1937 {
7ac01ea0 1938 print_error (pe, errs_as_warnings);
16fc963f
SM
1939
1940 // XXX: do we want tok_junk to be able to force skip_some behaviour?
cd7116b8 1941 if (pe.skip_some) // for recovery
46954f1d
FCE
1942 // Quietly swallow all tokens until the next keyword we can start parsing from.
1943 while (1)
1944 try
1945 {
cd7116b8
FCE
1946 {
1947 const token* t = peek ();
1948 if (! t)
1949 break;
46954f1d 1950 if (t->type == tok_keyword && t->content == "probe") break;
42eed2a0 1951 else if (t->type == tok_keyword && t->content == "private") break;
46954f1d
FCE
1952 else if (t->type == tok_keyword && t->content == "global") break;
1953 else if (t->type == tok_keyword && t->content == "function") break;
1954 else if (t->type == tok_embedded) break;
731a5359 1955 swallow (); // swallow it
cd7116b8 1956 }
46954f1d
FCE
1957 }
1958 catch (parse_error& pe2)
1959 {
1960 // parse error during recovery ... ugh
1961 print_error (pe2);
1962 }
177a8ead 1963 }
2f1a1aea
FCE
1964 }
1965
4bc2b5cd 1966 if (empty && user_file)
56099f08 1967 {
534aad8b
SM
1968 // vary message depending on whether file was *actually* empty:
1969 cerr << (input.saw_tokens
1970 ? _F("Input file '%s' is empty after preprocessing.", input_name.c_str())
4cd32d8c 1971 : _F("Input file '%s' is empty.", input_name.c_str()))
534aad8b 1972 << endl;
56099f08 1973 delete f;
2203b032 1974 f = 0;
56099f08
FCE
1975 }
1976 else if (num_errors > 0)
2f1a1aea 1977 {
52c2652f 1978 cerr << _NF("%d parse error.", "%d parse errors.", num_errors, num_errors) << endl;
2f1a1aea 1979 delete f;
2203b032 1980 f = 0;
2f1a1aea 1981 }
dff50e09 1982
2203b032 1983 input.set_current_file(0);
2f1a1aea
FCE
1984 return f;
1985}
1986
1987
101b0805 1988probe*
f8405ea5 1989parser::parse_synthetic_probe (const token* chain)
101b0805
JS
1990{
1991 probe* p = NULL;
1992 stapfile* f = new stapfile;
1993 f->synthetic = true;
1994 input.set_current_file (f);
1995 input.set_current_token_chain (chain);
1996
1997 try
1998 {
1999 context = con_probe;
2000 parse_probe (f->probes, f->aliases);
2001
2002 if (f->probes.size() != 1 || !f->aliases.empty())
2003 throw PARSE_ERROR (_("expected a single synthetic probe"));
2004 p = f->probes[0];
2005 }
2006 catch (parse_error& pe)
2007 {
2008 print_error (pe, errs_as_warnings);
2009 }
2010
2011 // TODO check for unparsed tokens?
2012
2013 input.set_current_file(0);
2014 input.set_current_token_chain(0);
2015 return p;
2016}
2017
2018
20c6c071 2019void
aa389a19
FCE
2020parser::parse_probe (vector<probe *> & probe_ret,
2021 vector<probe_alias *> & alias_ret)
2f1a1aea 2022{
82919855 2023 const token* t0 = next ();
6e213f58 2024 if (! (t0->type == tok_keyword && t0->content == "probe"))
f0454224 2025 throw PARSE_ERROR (_("expected 'probe'"));
82919855 2026
20c6c071
GH
2027 vector<probe_point *> aliases;
2028 vector<probe_point *> locations;
2029
97266278
LG
2030 int epilogue_alias = 0;
2031
2f1a1aea
FCE
2032 while (1)
2033 {
380d759b 2034 vector<probe_point*> pps = parse_probe_points();
dff50e09 2035
b4ceace2 2036 const token* t = peek ();
380d759b 2037 if (pps.size() == 1 && t
b4ceace2
FCE
2038 && t->type == tok_operator && t->content == "=")
2039 {
380d759b
FL
2040 if (pps[0]->optional || pps[0]->sufficient)
2041 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps[0]->components.front()->tok);
2042 aliases.push_back(pps[0]);
731a5359 2043 swallow ();
b4ceace2
FCE
2044 continue;
2045 }
380d759b 2046 else if (pps.size() == 1 && t
97266278
LG
2047 && t->type == tok_operator && t->content == "+=")
2048 {
380d759b
FL
2049 if (pps[0]->optional || pps[0]->sufficient)
2050 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps[0]->components.front()->tok);
2051 aliases.push_back(pps[0]);
97266278 2052 epilogue_alias = 1;
731a5359 2053 swallow ();
97266278
LG
2054 continue;
2055 }
b4ceace2
FCE
2056 else if (t && t->type == tok_operator && t->content == "{")
2057 {
380d759b 2058 locations.insert(locations.end(), pps.begin(), pps.end());
b4ceace2
FCE
2059 break;
2060 }
2f1a1aea 2061 else
f0454224 2062 throw PARSE_ERROR (_("expected probe point specifier"));
2f1a1aea 2063 }
20c6c071 2064
20c6c071
GH
2065 if (aliases.empty())
2066 {
54dfabe9
FCE
2067 probe* p = new probe;
2068 p->tok = t0;
2069 p->locations = locations;
2070 p->body = parse_stmt_block ();
37ebca01 2071 p->privileged = privileged;
a07a2c28 2072 p->systemtap_v_conditional = systemtap_v_seen;
54dfabe9 2073 probe_ret.push_back (p);
20c6c071
GH
2074 }
2075 else
2076 {
54dfabe9 2077 probe_alias* p = new probe_alias (aliases);
97266278
LG
2078 if(epilogue_alias)
2079 p->epilogue_style = true;
2080 else
2081 p->epilogue_style = false;
54dfabe9
FCE
2082 p->tok = t0;
2083 p->locations = locations;
2084 p->body = parse_stmt_block ();
37ebca01 2085 p->privileged = privileged;
a07a2c28 2086 p->systemtap_v_conditional = systemtap_v_seen;
54dfabe9 2087 alias_ret.push_back (p);
20c6c071 2088 }
54dfabe9 2089}
20c6c071 2090
54dfabe9
FCE
2091
2092embeddedcode*
2093parser::parse_embeddedcode ()
2094{
2095 embeddedcode* e = new embeddedcode;
2096 const token* t = next ();
2097 if (t->type != tok_embedded)
f0454224 2098 throw PARSE_ERROR (_("expected '%{'"));
24cb178f
FCE
2099
2100 if (! privileged)
f0454224 2101 throw PARSE_ERROR (_("embedded code in unprivileged script; need stap -g"),
cd7116b8 2102 false /* don't skip tokens for parse resumption */);
54dfabe9
FCE
2103
2104 e->tok = t;
47d349b1 2105 e->code = t->content;
54dfabe9 2106 return e;
2f1a1aea
FCE
2107}
2108
2109
2110block*
56099f08 2111parser::parse_stmt_block ()
2f1a1aea
FCE
2112{
2113 block* pb = new block;
2114
56099f08
FCE
2115 const token* t = next ();
2116 if (! (t->type == tok_operator && t->content == "{"))
f0454224 2117 throw PARSE_ERROR (_("expected '{'"));
56099f08
FCE
2118
2119 pb->tok = t;
2b066ec1 2120
2f1a1aea
FCE
2121 while (1)
2122 {
46954f1d
FCE
2123 t = peek ();
2124 if (t && t->type == tok_operator && t->content == "}")
2125 {
731a5359 2126 swallow ();
46954f1d
FCE
2127 break;
2128 }
2129 pb->statements.push_back (parse_statement ());
2f1a1aea
FCE
2130 }
2131
2132 return pb;
2133}
2134
2135
f4fe2e93
FCE
2136try_block*
2137parser::parse_try_block ()
2138{
2139 try_block* pb = new try_block;
2140
731a5359 2141 pb->tok = expect_kw_token ("try");
f4fe2e93
FCE
2142 pb->try_block = parse_stmt_block();
2143 expect_kw ("catch");
2144
2145 const token* t = peek ();
3819d181 2146 if (t != NULL && t->type == tok_operator && t->content == "(")
f4fe2e93 2147 {
731a5359 2148 swallow (); // swallow the '('
f4fe2e93
FCE
2149
2150 t = next();
2151 if (! (t->type == tok_identifier))
f0454224 2152 throw PARSE_ERROR (_("expected identifier"));
f4fe2e93
FCE
2153 symbol* sym = new symbol;
2154 sym->tok = t;
a3e980f9 2155 sym->name = t->content;
f4fe2e93
FCE
2156 pb->catch_error_var = sym;
2157
2158 expect_op (")");
2159 }
2160 else
2161 pb->catch_error_var = 0;
2162
2163 pb->catch_block = parse_stmt_block();
2164
2165 return pb;
2166}
2167
2168
2169
2f1a1aea
FCE
2170statement*
2171parser::parse_statement ()
2172{
40b71c47 2173 statement *ret;
2f1a1aea
FCE
2174 const token* t = peek ();
2175 if (t && t->type == tok_operator && t->content == ";")
f946b10f 2176 return new null_statement (next ());
dff50e09 2177 else if (t && t->type == tok_operator && t->content == "{")
40b71c47 2178 return parse_stmt_block (); // Don't squash semicolons.
f4fe2e93
FCE
2179 else if (t && t->type == tok_keyword && t->content == "try")
2180 return parse_try_block (); // Don't squash semicolons.
6e213f58 2181 else if (t && t->type == tok_keyword && t->content == "if")
40b71c47 2182 return parse_if_statement (); // Don't squash semicolons.
6e213f58 2183 else if (t && t->type == tok_keyword && t->content == "for")
40b71c47 2184 return parse_for_loop (); // Don't squash semicolons.
6e213f58 2185 else if (t && t->type == tok_keyword && t->content == "foreach")
40b71c47
MW
2186 return parse_foreach_loop (); // Don't squash semicolons.
2187 else if (t && t->type == tok_keyword && t->content == "while")
2188 return parse_while_loop (); // Don't squash semicolons.
6e213f58 2189 else if (t && t->type == tok_keyword && t->content == "return")
40b71c47 2190 ret = parse_return_statement ();
6e213f58 2191 else if (t && t->type == tok_keyword && t->content == "delete")
40b71c47 2192 ret = parse_delete_statement ();
6e213f58 2193 else if (t && t->type == tok_keyword && t->content == "break")
40b71c47 2194 ret = parse_break_statement ();
6e213f58 2195 else if (t && t->type == tok_keyword && t->content == "continue")
40b71c47 2196 ret = parse_continue_statement ();
6e213f58 2197 else if (t && t->type == tok_keyword && t->content == "next")
40b71c47 2198 ret = parse_next_statement ();
2f1a1aea
FCE
2199 else if (t && (t->type == tok_operator || // expressions are flexible
2200 t->type == tok_identifier ||
2201 t->type == tok_number ||
7d902887
FCE
2202 t->type == tok_string ||
2203 t->type == tok_embedded ))
40b71c47 2204 ret = parse_expr_statement ();
54dfabe9 2205 // XXX: consider generally accepting tok_embedded here too
2f1a1aea 2206 else
f0454224 2207 throw PARSE_ERROR (_("expected statement"));
40b71c47
MW
2208
2209 // Squash "empty" trailing colons after any "non-block-like" statement.
2210 t = peek ();
2211 if (t && t->type == tok_operator && t->content == ";")
2212 {
731a5359 2213 swallow (); // Silently eat trailing ; after statement
40b71c47
MW
2214 }
2215
2216 return ret;
2f1a1aea
FCE
2217}
2218
38bf68a8 2219void
f41e297c
JS
2220parser::parse_private (vector <vardecl*>& globals, vector<probe*>& probes,
2221 string const & fname, vector<functiondecl*>& functions)
38bf68a8
MC
2222{
2223 const token* t = next ();
127e4e36 2224 if (! (t->type == tok_keyword && t->content == "private"))
38bf68a8
MC
2225 throw PARSE_ERROR (_("expected 'private'"));
2226 swallow ();
2227 t = next ();
2228 if (t->type == tok_keyword && t->content == "function")
2229 {
2230 swallow ();
2231 context = con_function;
2232 do_parse_functiondecl(functions, t, fname, true);
2233 }
2234 else if (t->type == tok_keyword && t->content == "global")
2235 {
2236 swallow ();
2237 context = con_global;
2238 t = next ();
127e4e36 2239 if (! (t->type == tok_identifier))
38bf68a8
MC
2240 throw PARSE_ERROR (_("expected identifier"));
2241 do_parse_global(globals, probes, fname, t, true);
2242 }
2243 // The `private <identifier>` is an acceptable shorthand
2244 // for `private global <identifier>` per above.
2245 else if (t->type == tok_identifier)
2246 {
2247 context = con_global;
2248 do_parse_global(globals, probes, fname, t, true);
2249 }
2250 else
2251 throw PARSE_ERROR (_("expected 'function' or identifier"));
2252}
2f1a1aea 2253
56099f08 2254void
f41e297c
JS
2255parser::parse_global (vector <vardecl*>& globals, vector<probe*>& probes,
2256 string const & fname)
2f1a1aea 2257{
82919855 2258 const token* t0 = next ();
6e213f58 2259 if (! (t0->type == tok_keyword && t0->content == "global"))
38bf68a8 2260 throw PARSE_ERROR (_("expected 'global' or 'private'"));
731a5359 2261 swallow ();
38bf68a8
MC
2262 do_parse_global(globals, probes, fname, 0, false);
2263}
82919855 2264
38bf68a8 2265void
f41e297c
JS
2266parser::do_parse_global (vector <vardecl*>& globals, vector<probe*>&,
2267 string const & fname, const token* t0, bool priv)
38bf68a8
MC
2268{
2269 bool iter0 = true;
2270 const token* t;
56099f08
FCE
2271 while (1)
2272 {
42eed2a0
MC
2273 t = (iter0 && priv) ? t0 : next ();
2274 iter0 = false;
56099f08 2275 if (! (t->type == tok_identifier))
f0454224 2276 throw PARSE_ERROR (_("expected identifier"));
56099f08 2277
c8fbf931
MC
2278 string gname = "__global_" + string(t->content);
2279 string pname = "__private_" + detox_path(fname) + string(t->content);
2280 string name = priv ? pname : gname;
2281
2b066ec1 2282 for (unsigned i=0; i<globals.size(); i++)
c8fbf931
MC
2283 {
2284 if (globals[i]->name == name)
f0454224 2285 throw PARSE_ERROR (_("duplicate global name"));
c8fbf931
MC
2286 if ((globals[i]->name == gname) || (globals[i]->name == pname))
2287 throw PARSE_ERROR (_("global versus private variable declaration conflict"));
2288 }
38bf68a8 2289
24cb178f 2290 vardecl* d = new vardecl;
9fef07ff 2291 d->unmangled_name = t->content;
38bf68a8 2292 d->name = name;
24cb178f 2293 d->tok = t;
a07a2c28 2294 d->systemtap_v_conditional = systemtap_v_seen;
24cb178f 2295 globals.push_back (d);
56099f08 2296
82919855 2297 t = peek ();
ef474d24 2298
74e6cc92
CM
2299 if(t && t->type == tok_operator && t->content == "%") //wrapping
2300 {
2301 d->wrap = true;
731a5359 2302 swallow ();
74e6cc92
CM
2303 t = peek();
2304 }
2305
ef474d24
JS
2306 if (t && t->type == tok_operator && t->content == "[") // array size
2307 {
2308 int64_t size;
731a5359 2309 swallow ();
ef474d24
JS
2310 expect_number(size);
2311 if (size <= 0 || size > 1000000) // arbitrary max
f0454224 2312 throw PARSE_ERROR(_("array size out of range"));
ef474d24
JS
2313 d->maxsize = (int)size;
2314 expect_known(tok_operator, "]");
2315 t = peek ();
2316 }
2317
4b5f3e45 2318 if (t && t->type == tok_operator && t->content == "=") // initialization
ef474d24
JS
2319 {
2320 if (!d->compatible_arity(0))
f0454224 2321 throw PARSE_ERROR(_("only scalar globals can be initialized"));
58701b78 2322 d->set_arity(0, t);
731a5359 2323 next (); // Don't swallow, set_arity() used the peeked token.
ef474d24
JS
2324 d->init = parse_literal ();
2325 d->type = d->init->type;
2326 t = peek ();
2327 }
4b5f3e45 2328
c3799d72 2329 if (t && t->type == tok_operator && t->content == ";") // termination
950da622 2330 {
731a5359 2331 swallow ();
950da622
MW
2332 break;
2333 }
c3799d72 2334
4b5f3e45 2335 if (t && t->type == tok_operator && t->content == ",") // next global
82919855 2336 {
731a5359 2337 swallow ();
82919855
FCE
2338 continue;
2339 }
56099f08 2340 else
82919855 2341 break;
56099f08
FCE
2342 }
2343}
2344
24cb178f 2345void
f41e297c
JS
2346parser::parse_functiondecl (vector<functiondecl*>& functions,
2347 string const & fname)
56099f08 2348{
82919855 2349 const token* t = next ();
6e213f58 2350 if (! (t->type == tok_keyword && t->content == "function"))
f0454224 2351 throw PARSE_ERROR (_("expected 'function'"));
731a5359 2352 swallow ();
38bf68a8
MC
2353 do_parse_functiondecl(functions, t, fname, false);
2354}
56099f08 2355
38bf68a8 2356void
f41e297c
JS
2357parser::do_parse_functiondecl (vector<functiondecl*>& functions, const token* t,
2358 string const & fname, bool priv)
38bf68a8 2359{
82919855 2360 t = next ();
6e213f58
DS
2361 if (! (t->type == tok_identifier)
2362 && ! (t->type == tok_keyword
2363 && (t->content == "string" || t->content == "long")))
f0454224 2364 throw PARSE_ERROR (_("expected identifier"));
24cb178f 2365
c8fbf931
MC
2366 string gname = "__global_" + string(t->content);
2367 string pname = "__private_" + detox_path(fname) + string(t->content);
2368 string name = priv ? pname : gname;
7b5b30a8 2369 name += "__overload_" + lex_cast(session.overload_count[t->content]++);
38bf68a8 2370
24cb178f 2371 functiondecl *fd = new functiondecl ();
9fef07ff 2372 fd->unmangled_name = t->content;
38bf68a8 2373 fd->name = name;
56099f08
FCE
2374 fd->tok = t;
2375
2376 t = next ();
6a505121
FCE
2377 if (t->type == tok_operator && t->content == ":")
2378 {
731a5359 2379 swallow ();
6a505121 2380 t = next ();
6e213f58 2381 if (t->type == tok_keyword && t->content == "string")
6a505121 2382 fd->type = pe_string;
6e213f58 2383 else if (t->type == tok_keyword && t->content == "long")
6a505121 2384 fd->type = pe_long;
f0454224 2385 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
731a5359 2386 swallow ();
6a505121
FCE
2387
2388 t = next ();
2389 }
2390
56099f08 2391 if (! (t->type == tok_operator && t->content == "("))
f0454224 2392 throw PARSE_ERROR (_("expected '('"));
731a5359 2393 swallow ();
56099f08
FCE
2394
2395 while (1)
2396 {
2397 t = next ();
2398
100a540e 2399 // permit zero-argument functions
56099f08 2400 if (t->type == tok_operator && t->content == ")")
731a5359
MW
2401 {
2402 swallow ();
2403 break;
2404 }
56099f08 2405 else if (! (t->type == tok_identifier))
f0454224 2406 throw PARSE_ERROR (_("expected identifier"));
56099f08 2407 vardecl* vd = new vardecl;
9fef07ff 2408 vd->unmangled_name = vd->name = t->content;
59093206
CS
2409
2410 for (auto it = fd->formal_args.begin() ; it != fd->formal_args.end(); ++it)
2411 {
2412 string param = vd->unmangled_name;
2413 if ((*it)->unmangled_name == param)
2414 throw PARSE_ERROR(_("duplicate parameter names"));
2415 }
2416
56099f08
FCE
2417 vd->tok = t;
2418 fd->formal_args.push_back (vd);
a07a2c28 2419 fd->systemtap_v_conditional = systemtap_v_seen;
56099f08
FCE
2420
2421 t = next ();
59093206 2422
6a505121
FCE
2423 if (t->type == tok_operator && t->content == ":")
2424 {
731a5359 2425 swallow ();
6a505121 2426 t = next ();
6e213f58 2427 if (t->type == tok_keyword && t->content == "string")
6a505121 2428 vd->type = pe_string;
6e213f58 2429 else if (t->type == tok_keyword && t->content == "long")
6a505121 2430 vd->type = pe_long;
f0454224 2431 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
731a5359 2432 swallow ();
6a505121
FCE
2433 t = next ();
2434 }
56099f08 2435 if (t->type == tok_operator && t->content == ")")
731a5359
MW
2436 {
2437 swallow ();
2438 break;
59093206 2439 }
56099f08 2440 if (t->type == tok_operator && t->content == ",")
731a5359
MW
2441 {
2442 swallow ();
2443 continue;
2444 }
56099f08 2445 else
f0454224 2446 throw PARSE_ERROR (_("expected ',' or ')'"));
56099f08
FCE
2447 }
2448
7b5b30a8
FL
2449 t = peek();
2450 if (t->type == tok_operator && t->content == ":")
2451 {
7b5b30a8 2452 swallow();
c0d0d623
FL
2453 literal* literal = parse_literal();
2454 fd->priority = dynamic_cast<literal_number*>(literal)->value;
7b5b30a8 2455 // reserve priority 0 for user script implementation
c0d0d623 2456 if (fd->priority < 1)
7b5b30a8 2457 throw PARSE_ERROR (_("specified priority must be > 0"));
c0d0d623 2458 delete literal;
7b5b30a8
FL
2459 }
2460 else if (user_file)
2461 {
2462 // allow script file implementation override automatically when
2463 // priority not specified
2464 fd->priority = 0;
2465 }
2466
54dfabe9
FCE
2467 t = peek ();
2468 if (t && t->type == tok_embedded)
2469 fd->body = parse_embeddedcode ();
2470 else
2471 fd->body = parse_stmt_block ();
24cb178f
FCE
2472
2473 functions.push_back (fd);
2f1a1aea
FCE
2474}
2475
380d759b
FL
2476vector<probe_point*>
2477parser::parse_probe_points()
2f1a1aea 2478{
380d759b
FL
2479 vector<probe_point*> pps;
2480 while (1)
2481 {
2482 vector<probe_point*> tail = parse_components();
2483 pps.insert(pps.end(), tail.begin(), tail.end());
2484
2485 const token* t = peek();
2486 if (t && t->type == tok_operator && t->content == ",")
2487 {
2488 swallow();
2489 continue;
2490 }
2491
2492 if (t && t->type == tok_operator
2493 && (t->content == "{" || t->content == "=" ||
2494 t->content == "+="|| t->content == "}"))
2495 break;
2f1a1aea 2496
380d759b
FL
2497 throw PARSE_ERROR (_("expected one of ', { } = +='"));
2498 }
2499 return pps;
2500}
2501
2502vector<probe_point*>
2503parser::parse_components()
2504{
2505 vector<probe_point*> pps;
9c0c0e46 2506 while (1)
2f1a1aea 2507 {
380d759b 2508 vector<probe_point*> suffix = parse_component();
9c0c0e46 2509
380d759b
FL
2510 // Cartesian product of components
2511 if (pps.empty())
2512 pps = suffix;
2513 else
2514 {
2515 assert(!suffix.empty());
2516 vector<probe_point*> product;
2517 for (unsigned i = 0; i < pps.size(); i++)
2518 {
2519 if (pps[i]->optional || pps[i]->sufficient || pps[i]->condition)
2520 throw PARSE_ERROR (_("'?', '!' or condition must only be specified in suffix"),
2521 pps[i]->components[0]->tok);
2522 for (unsigned j = 0; j < suffix.size(); j++)
2523 {
2524 probe_point* pp = new probe_point;
2525 pp->components.insert(pp->components.end(),
2526 pps[i]->components.begin(), pps[i]->components.end());
2527 pp->components.insert(pp->components.end(),
2528 suffix[j]->components.begin(), suffix[j]->components.end());
2529 pp->optional = suffix[j]->optional;
2530 pp->sufficient = suffix[j]->sufficient;
2531 pp->condition = suffix[j]->condition;
2532 product.push_back(pp);
2533 }
2534 }
2535 for (unsigned i = 0; i < pps.size(); i++) delete pps[i];
2536 for (unsigned i = 0; i < suffix.size(); i++) delete suffix[i];
2537 pps = product;
2538 }
2539
2540 const token* t = peek();
2541 if (t && t->type == tok_operator && t->content == ".")
2542 {
2543 swallow ();
2544 continue;
2545 }
2546
2547 // We only fall through here at the end of a probe point (past
2548 // all the dotted/parametrized components).
2549
2550 if (t && t->type == tok_operator &&
2551 (t->content == "?" || t->content == "!"))
2552 {
2553 for (unsigned i = 0; i < pps.size(); i++)
2554 {
2555 if (pps[i]->optional || pps[i]->sufficient)
2556 throw PARSE_ERROR (_("'?' or '!' respecified"));
2557 pps[i]->optional = true;
2558 if (t->content == "!") pps[i]->sufficient = true;
2559 }
2560 // NB: sufficient implies optional
2561 swallow ();
2562 t = peek ();
2563 // fall through
2564 }
2565
2566 if (t && t->type == tok_keyword && t->content == "if")
2567 {
2568 swallow ();
2569 t = peek ();
2570 if (!(t && t->type == tok_operator && t->content == "("))
2571 throw PARSE_ERROR (_("expected '('"));
2572 swallow ();
2573
2574 expression* e = parse_expression();
2575 for (unsigned i = 0; i < pps.size(); i++)
2576 {
2577 if (pps[i]->condition != 0)
2578 throw PARSE_ERROR (_("condition respecified"));
2579 pps[i]->condition = e;
2580 }
2581
2582 t = peek ();
2583 if (!(t && t->type == tok_operator && t->content == ")"))
2584 throw PARSE_ERROR (_("expected ')'"));
2585 swallow ();
2586 }
2587
2588 break;
2589 }
2590 return pps;
2591}
2592
2593vector<probe_point*>
2594parser::parse_component()
2595{
2596 const token* t = next ();
2597 if (! (t->type == tok_identifier
2598 // we must allow ".return" and ".function", which are keywords
2599 || t->type == tok_keyword
2600 // we must allow "*", due to being an operator
2601 || (t->type == tok_operator && (t->content == "*" || t->content == "{"))))
2602 throw PARSE_ERROR (_("expected identifier or '*' or '{'"));
2603
2604 if (t && t->type == tok_operator && t->content == "{")
2605 {
2606 swallow();
2607 vector<probe_point*> pps = parse_probe_points();
2608 t = peek();
2609 if (!(t && t->type == tok_operator && t->content == "}"))
2610 throw PARSE_ERROR (_("expected '}'"));
2611 swallow();
2612 return pps;
2613 }
2614 else
2615 {
b5477cd9 2616 // loop which reconstitutes an identifier with wildcards
47d349b1 2617 string content = t->content;
eadd685c 2618 bool changed_p = false;
b5477cd9
SM
2619 while (1)
2620 {
2621 const token* u = peek();
3819d181
MW
2622 if (u == NULL)
2623 break;
b5477cd9
SM
2624 // ensure pieces of the identifier are adjacent:
2625 if (input.ate_whitespace)
2626 break;
2627 // ensure pieces of the identifier are valid:
2628 if (! (u->type == tok_identifier
2629 // we must allow arbitrary keywords with a wildcard
2630 || u->type == tok_keyword
2631 // we must allow "*", due to being an operator
2632 || (u->type == tok_operator && u->content == "*")))
2633 break;
2634
2635 // append u to t
47d349b1 2636 content = content + (string)u->content;
eadd685c 2637 changed_p = true;
380d759b 2638
b5477cd9 2639 // consume u
731a5359 2640 swallow ();
b5477cd9 2641 }
eadd685c
FCE
2642
2643 if (changed_p)
2644 {
2645 // We've already swallowed the first token and we're not
2646 // putting it back; no one else has a copy; so we can
2647 // safely overwrite its content and reuse it.
2648 const_cast<token*>(t)->content = content;
2649 }
9c0c0e46
FCE
2650
2651 probe_point::component* c = new probe_point::component;
a3e980f9 2652 c->functor = t->content;
f1a0157a 2653 c->tok = t;
380d759b
FL
2654 vector<probe_point*> pps;
2655 probe_point* pp = new probe_point;
2656 pp->components.push_back(c);
2657 pps.push_back(pp);
6e3347a9 2658 // NB we may add c->arg soon
9c0c0e46
FCE
2659
2660 t = peek ();
a477f3f1 2661
6e3347a9 2662 // consume optional parameter
9c0c0e46
FCE
2663 if (t && t->type == tok_operator && t->content == "(")
2664 {
731a5359 2665 swallow (); // consume "("
9c0c0e46
FCE
2666 c->arg = parse_literal ();
2667
2668 t = next ();
2669 if (! (t->type == tok_operator && t->content == ")"))
f0454224 2670 throw PARSE_ERROR (_("expected ')'"));
731a5359 2671 swallow ();
6e3347a9 2672 }
380d759b 2673 return pps;
2f1a1aea 2674 }
2f1a1aea
FCE
2675}
2676
d24f1ff4
SM
2677literal_string*
2678parser::consume_string_literals(const token *t)
2679{
0ad0aae2 2680 literal_string *ls = new literal_string (t->content);
d24f1ff4
SM
2681
2682 // PR11208: check if the next token is also a string literal;
2683 // auto-concatenate it. This is complicated to the extent that we
2684 // need to skip intermediate whitespace.
2685 //
2686 // NB for versions prior to 2.0: but don't skip over intervening comments
0ad0aae2
JS
2687 string concat;
2688 bool p_concat = false;
d24f1ff4
SM
2689 const token *n = peek();
2690 while (n != NULL && n->type == tok_string
f8405ea5 2691 && ! (!input.has_version("2.0") && input.ate_comment))
d24f1ff4 2692 {
0ad0aae2
JS
2693 if (!p_concat)
2694 {
2695 concat = t->content;
2696 p_concat = true;
2697 }
2698 concat.append(n->content.data(), n->content.size());
2699 next(); // consume the token
d24f1ff4
SM
2700 n = peek();
2701 }
0ad0aae2
JS
2702 if (p_concat)
2703 ls->value = concat;
d24f1ff4
SM
2704 return ls;
2705}
2706
2707
2708// Parse a string literal and perform backslash escaping on the contents:
2709literal_string*
2710parser::parse_literal_string ()
2711{
2712 const token* t = next ();
2713 literal_string* l;
2714 if (t->type == tok_string)
2715 l = consume_string_literals (t);
2716 else
f0454224 2717 throw PARSE_ERROR (_("expected literal string"));
d24f1ff4
SM
2718
2719 l->tok = t;
2720 return l;
2721}
2722
2723
2f1a1aea
FCE
2724literal*
2725parser::parse_literal ()
2726{
2727 const token* t = next ();
56099f08 2728 literal* l;
2f1a1aea 2729 if (t->type == tok_string)
c5be7511 2730 {
d24f1ff4 2731 l = consume_string_literals (t);
c5be7511 2732 }
16e8f21f 2733 else
9c0c0e46 2734 {
16e8f21f
JS
2735 bool neg = false;
2736 if (t->type == tok_operator && t->content == "-")
2737 {
2738 neg = true;
731a5359 2739 swallow ();
16e8f21f
JS
2740 t = next ();
2741 }
2742
2743 if (t->type == tok_number)
2744 {
7371cd19
JS
2745 const string& s = t->content;
2746 const char* startp = s.c_str ();
16e8f21f
JS
2747 char* endp = (char*) startp;
2748
2749 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
2750 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
79e6d33f
JS
2751 // since the lexer only gives us positive digit strings, but we'll
2752 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
16e8f21f
JS
2753 errno = 0;
2754 long long value = (long long) strtoull (startp, & endp, 0);
16e8f21f 2755 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
79e6d33f 2756 || (neg && (unsigned long long) value > 9223372036854775808ULL)
16e8f21f
JS
2757 || (unsigned long long) value > 18446744073709551615ULL
2758 || value < -9223372036854775807LL-1)
f0454224 2759 throw PARSE_ERROR (_("number invalid or out of range"));
16e8f21f 2760
79e6d33f
JS
2761 if (neg)
2762 value = -value;
2763
16e8f21f
JS
2764 l = new literal_number (value);
2765 }
2766 else
f0454224 2767 throw PARSE_ERROR (_("expected literal string or number"));
9c0c0e46 2768 }
56099f08
FCE
2769
2770 l->tok = t;
2771 return l;
2f1a1aea
FCE
2772}
2773
2774
2775if_statement*
2776parser::parse_if_statement ()
2777{
2778 const token* t = next ();
6e213f58 2779 if (! (t->type == tok_keyword && t->content == "if"))
f0454224 2780 throw PARSE_ERROR (_("expected 'if'"));
56099f08
FCE
2781 if_statement* s = new if_statement;
2782 s->tok = t;
2783
2784 t = next ();
2f1a1aea 2785 if (! (t->type == tok_operator && t->content == "("))
f0454224 2786 throw PARSE_ERROR (_("expected '('"));
731a5359 2787 swallow ();
2f1a1aea 2788
2f1a1aea
FCE
2789 s->condition = parse_expression ();
2790
2791 t = next ();
2792 if (! (t->type == tok_operator && t->content == ")"))
f0454224 2793 throw PARSE_ERROR (_("expected ')'"));
731a5359 2794 swallow ();
2f1a1aea
FCE
2795
2796 s->thenblock = parse_statement ();
2797
2798 t = peek ();
6e213f58 2799 if (t && t->type == tok_keyword && t->content == "else")
2f1a1aea 2800 {
731a5359 2801 swallow ();
2f1a1aea
FCE
2802 s->elseblock = parse_statement ();
2803 }
ed10c639
FCE
2804 else
2805 s->elseblock = 0; // in case not otherwise initialized
2f1a1aea
FCE
2806
2807 return s;
2808}
2809
2810
69c68955
FCE
2811expr_statement*
2812parser::parse_expr_statement ()
2813{
2814 expr_statement *es = new expr_statement;
2815 const token* t = peek ();
5e58d11c 2816 if (t == NULL)
f0454224 2817 throw PARSE_ERROR (_("expression statement expected"));
731a5359
MW
2818 // Copy, we only peeked, parse_expression might swallow.
2819 es->tok = new token (*t);
69c68955
FCE
2820 es->value = parse_expression ();
2821 return es;
2822}
2823
2824
56099f08
FCE
2825return_statement*
2826parser::parse_return_statement ()
2827{
2828 const token* t = next ();
6e213f58 2829 if (! (t->type == tok_keyword && t->content == "return"))
f0454224 2830 throw PARSE_ERROR (_("expected 'return'"));
6e213f58 2831 if (context != con_function)
f0454224 2832 throw PARSE_ERROR (_("found 'return' not in function context"));
56099f08
FCE
2833 return_statement* s = new return_statement;
2834 s->tok = t;
2835 s->value = parse_expression ();
2836 return s;
2837}
2838
2839
2840delete_statement*
2841parser::parse_delete_statement ()
2842{
2843 const token* t = next ();
6e213f58 2844 if (! (t->type == tok_keyword && t->content == "delete"))
f0454224 2845 throw PARSE_ERROR (_("expected 'delete'"));
56099f08
FCE
2846 delete_statement* s = new delete_statement;
2847 s->tok = t;
2848 s->value = parse_expression ();
2849 return s;
2850}
2851
2852
f3c26ea5
FCE
2853next_statement*
2854parser::parse_next_statement ()
2855{
2856 const token* t = next ();
6e213f58 2857 if (! (t->type == tok_keyword && t->content == "next"))
f0454224 2858 throw PARSE_ERROR (_("expected 'next'"));
f3c26ea5
FCE
2859 next_statement* s = new next_statement;
2860 s->tok = t;
2861 return s;
2862}
2863
2864
2865break_statement*
2866parser::parse_break_statement ()
2867{
2868 const token* t = next ();
6e213f58 2869 if (! (t->type == tok_keyword && t->content == "break"))
f0454224 2870 throw PARSE_ERROR (_("expected 'break'"));
f3c26ea5
FCE
2871 break_statement* s = new break_statement;
2872 s->tok = t;
2873 return s;
2874}
2875
2876
2877continue_statement*
2878parser::parse_continue_statement ()
2879{
2880 const token* t = next ();
6e213f58 2881 if (! (t->type == tok_keyword && t->content == "continue"))
f0454224 2882 throw PARSE_ERROR (_("expected 'continue'"));
f3c26ea5
FCE
2883 continue_statement* s = new continue_statement;
2884 s->tok = t;
2885 return s;
2886}
2887
2888
69c68955
FCE
2889for_loop*
2890parser::parse_for_loop ()
2891{
f3c26ea5 2892 const token* t = next ();
6e213f58 2893 if (! (t->type == tok_keyword && t->content == "for"))
f0454224 2894 throw PARSE_ERROR (_("expected 'for'"));
f3c26ea5
FCE
2895 for_loop* s = new for_loop;
2896 s->tok = t;
2897
2898 t = next ();
2899 if (! (t->type == tok_operator && t->content == "("))
f0454224 2900 throw PARSE_ERROR (_("expected '('"));
731a5359 2901 swallow ();
f3c26ea5
FCE
2902
2903 // initializer + ";"
2904 t = peek ();
2905 if (t && t->type == tok_operator && t->content == ";")
2906 {
cbfbbf69 2907 s->init = 0;
731a5359 2908 swallow ();
f3c26ea5
FCE
2909 }
2910 else
2911 {
2912 s->init = parse_expr_statement ();
2913 t = next ();
2914 if (! (t->type == tok_operator && t->content == ";"))
f0454224 2915 throw PARSE_ERROR (_("expected ';'"));
731a5359 2916 swallow ();
f3c26ea5
FCE
2917 }
2918
2919 // condition + ";"
2920 t = peek ();
2921 if (t && t->type == tok_operator && t->content == ";")
2922 {
2923 literal_number* l = new literal_number(1);
2924 s->cond = l;
2925 s->cond->tok = next ();
2926 }
2927 else
2928 {
2929 s->cond = parse_expression ();
2930 t = next ();
2931 if (! (t->type == tok_operator && t->content == ";"))
f0454224 2932 throw PARSE_ERROR (_("expected ';'"));
731a5359 2933 swallow ();
f3c26ea5 2934 }
dff50e09 2935
f3c26ea5
FCE
2936 // increment + ")"
2937 t = peek ();
2938 if (t && t->type == tok_operator && t->content == ")")
2939 {
cbfbbf69 2940 s->incr = 0;
731a5359 2941 swallow ();
f3c26ea5
FCE
2942 }
2943 else
2944 {
2945 s->incr = parse_expr_statement ();
2946 t = next ();
2947 if (! (t->type == tok_operator && t->content == ")"))
f0454224 2948 throw PARSE_ERROR (_("expected ')'"));
731a5359 2949 swallow ();
f3c26ea5
FCE
2950 }
2951
2952 // block
2953 s->block = parse_statement ();
2954
2955 return s;
2956}
2957
2958
2959for_loop*
2960parser::parse_while_loop ()
2961{
2962 const token* t = next ();
6e213f58 2963 if (! (t->type == tok_keyword && t->content == "while"))
f0454224 2964 throw PARSE_ERROR (_("expected 'while'"));
f3c26ea5
FCE
2965 for_loop* s = new for_loop;
2966 s->tok = t;
2967
2968 t = next ();
2969 if (! (t->type == tok_operator && t->content == "("))
f0454224 2970 throw PARSE_ERROR (_("expected '('"));
731a5359 2971 swallow ();
f3c26ea5
FCE
2972
2973 // dummy init and incr fields
cbfbbf69
FCE
2974 s->init = 0;
2975 s->incr = 0;
f3c26ea5
FCE
2976
2977 // condition
2978 s->cond = parse_expression ();
2979
f3c26ea5
FCE
2980 t = next ();
2981 if (! (t->type == tok_operator && t->content == ")"))
f0454224 2982 throw PARSE_ERROR (_("expected ')'"));
731a5359 2983 swallow ();
dff50e09 2984
f3c26ea5
FCE
2985 // block
2986 s->block = parse_statement ();
2987
2988 return s;
69c68955
FCE
2989}
2990
2991
2992foreach_loop*
2993parser::parse_foreach_loop ()
2994{
2995 const token* t = next ();
6e213f58 2996 if (! (t->type == tok_keyword && t->content == "foreach"))
f0454224 2997 throw PARSE_ERROR (_("expected 'foreach'"));
69c68955
FCE
2998 foreach_loop* s = new foreach_loop;
2999 s->tok = t;
93484556 3000 s->sort_direction = 0;
fd5689dc 3001 s->sort_aggr = sc_none;
c261711d 3002 s->value = NULL;
27f21e8c 3003 s->limit = NULL;
69c68955
FCE
3004
3005 t = next ();
3006 if (! (t->type == tok_operator && t->content == "("))
f0454224 3007 throw PARSE_ERROR (_("expected '('"));
731a5359 3008 swallow ();
69c68955 3009
c261711d
JS
3010 symbol* lookahead_sym = NULL;
3011 int lookahead_sort = 0;
3012
3013 t = peek ();
3014 if (t && t->type == tok_identifier)
3015 {
3016 next ();
3017 lookahead_sym = new symbol;
3018 lookahead_sym->tok = t;
a3e980f9 3019 lookahead_sym->name = t->content;
c261711d
JS
3020
3021 t = peek ();
3022 if (t && t->type == tok_operator &&
3023 (t->content == "+" || t->content == "-"))
3024 {
c261711d 3025 lookahead_sort = (t->content == "+") ? 1 : -1;
731a5359 3026 swallow ();
c261711d
JS
3027 }
3028
3029 t = peek ();
3030 if (t && t->type == tok_operator && t->content == "=")
3031 {
731a5359 3032 swallow ();
c261711d
JS
3033 s->value = lookahead_sym;
3034 if (lookahead_sort)
3035 {
3036 s->sort_direction = lookahead_sort;
3037 s->sort_column = 0;
3038 }
3039 lookahead_sym = NULL;
3040 }
3041 }
3042
69c68955
FCE
3043 // see also parse_array_in
3044
3045 bool parenthesized = false;
3046 t = peek ();
c261711d 3047 if (!lookahead_sym && t && t->type == tok_operator && t->content == "[")
69c68955 3048 {
731a5359 3049 swallow ();
69c68955
FCE
3050 parenthesized = true;
3051 }
3052
c261711d
JS
3053 if (lookahead_sym)
3054 {
3055 s->indexes.push_back (lookahead_sym);
3056 if (lookahead_sort)
3057 {
3058 s->sort_direction = lookahead_sort;
3059 s->sort_column = 1;
3060 }
3061 lookahead_sym = NULL;
3062 }
3063 else while (1)
69c68955
FCE
3064 {
3065 t = next ();
3066 if (! (t->type == tok_identifier))
f0454224 3067 throw PARSE_ERROR (_("expected identifier"));
69c68955
FCE
3068 symbol* sym = new symbol;
3069 sym->tok = t;
a3e980f9 3070 sym->name = t->content;
69c68955
FCE
3071 s->indexes.push_back (sym);
3072
93484556
FCE
3073 t = peek ();
3074 if (t && t->type == tok_operator &&
3075 (t->content == "+" || t->content == "-"))
3076 {
3077 if (s->sort_direction)
f0454224 3078 throw PARSE_ERROR (_("multiple sort directives"));
93484556
FCE
3079 s->sort_direction = (t->content == "+") ? 1 : -1;
3080 s->sort_column = s->indexes.size();
731a5359 3081 swallow ();
93484556
FCE
3082 }
3083
69c68955
FCE
3084 if (parenthesized)
3085 {
93484556 3086 t = peek ();
69c68955
FCE
3087 if (t && t->type == tok_operator && t->content == ",")
3088 {
731a5359 3089 swallow ();
69c68955
FCE
3090 continue;
3091 }
3092 else if (t && t->type == tok_operator && t->content == "]")
3093 {
731a5359 3094 swallow ();
69c68955
FCE
3095 break;
3096 }
dff50e09 3097 else
f0454224 3098 throw PARSE_ERROR (_("expected ',' or ']'"));
69c68955
FCE
3099 }
3100 else
3101 break; // expecting only one expression
3102 }
3103
3104 t = next ();
6e213f58 3105 if (! (t->type == tok_keyword && t->content == "in"))
f0454224 3106 throw PARSE_ERROR (_("expected 'in'"));
731a5359 3107 swallow ();
dff50e09 3108
d02548c0 3109 s->base = parse_indexable();
69c68955 3110
3040bf3a
AJ
3111 // check if there was an array slice that was specified
3112 t = peek();
3113 if (t && t->type == tok_operator && t->content == "[")
3114 {
3115 swallow();
3116 while (1)
3117 {
3118 t = peek();
3040bf3a
AJ
3119 if (t && t->type == tok_operator && t->content == "*")
3120 {
45af9d1b
AJ
3121 swallow();
3122 s->array_slice.push_back (NULL);
3040bf3a
AJ
3123 }
3124 else
45af9d1b 3125 s->array_slice.push_back (parse_expression());
3040bf3a
AJ
3126
3127 t = peek ();
3128 if (t && t->type == tok_operator && t->content == ",")
3129 {
3130 swallow ();
3131 continue;
3132 }
3133 else if (t && t->type == tok_operator && t->content == "]")
3134 {
3135 swallow ();
3136 break;
3137 }
3138 else
3139 throw PARSE_ERROR (_("expected ',' or ']'"));
3140 }
3141 }
3142
3143
fd5689dc
FCE
3144 // check for atword, see also expect_ident_or_atword,
3145 t = peek ();
3146 if (t && t->type == tok_operator && t->content[0] == '@')
3147 {
3148 if (t->content == "@avg") s->sort_aggr = sc_average;
3149 else if (t->content == "@min") s->sort_aggr = sc_min;
3150 else if (t->content == "@max") s->sort_aggr = sc_max;
3151 else if (t->content == "@count") s->sort_aggr = sc_count;
3152 else if (t->content == "@sum") s->sort_aggr = sc_sum;
f0454224 3153 else throw PARSE_ERROR(_("expected statistical operation"));
fd5689dc
FCE
3154 swallow();
3155
3156 t = peek ();
3157 if (! (t && t->type == tok_operator && (t->content == "+" || t->content == "-")))
f0454224 3158 throw PARSE_ERROR(_("expected sort directive"));
fd5689dc
FCE
3159 }
3160
93484556
FCE
3161 t = peek ();
3162 if (t && t->type == tok_operator &&
3163 (t->content == "+" || t->content == "-"))
3164 {
3165 if (s->sort_direction)
f0454224 3166 throw PARSE_ERROR (_("multiple sort directives"));
93484556
FCE
3167 s->sort_direction = (t->content == "+") ? 1 : -1;
3168 s->sort_column = 0;
731a5359 3169 swallow ();
93484556
FCE
3170 }
3171
27f21e8c
DS
3172 t = peek ();
3173 if (tok_is(t, tok_keyword, "limit"))
3174 {
731a5359 3175 swallow (); // get past the "limit"
27f21e8c
DS
3176 s->limit = parse_expression ();
3177 }
3178
69c68955
FCE
3179 t = next ();
3180 if (! (t->type == tok_operator && t->content == ")"))
f0454224 3181 throw PARSE_ERROR ("expected ')'");
731a5359 3182 swallow ();
69c68955
FCE
3183
3184 s->block = parse_statement ();
3185 return s;
3186}
3187
3188
2f1a1aea
FCE
3189expression*
3190parser::parse_expression ()
3191{
3192 return parse_assignment ();
3193}
3194
2f1a1aea
FCE
3195
3196expression*
3197parser::parse_assignment ()
3198{
3199 expression* op1 = parse_ternary ();
3200
3201 const token* t = peek ();
82919855 3202 // right-associative operators
dff50e09 3203 if (t && t->type == tok_operator
2f1a1aea 3204 && (t->content == "=" ||
82919855 3205 t->content == "<<<" ||
2f1a1aea 3206 t->content == "+=" ||
bb2e3076
FCE
3207 t->content == "-=" ||
3208 t->content == "*=" ||
3209 t->content == "/=" ||
3210 t->content == "%=" ||
3211 t->content == "<<=" ||
3212 t->content == ">>=" ||
3213 t->content == "&=" ||
3214 t->content == "^=" ||
3215 t->content == "|=" ||
d5d7c2cc 3216 t->content == ".=" ||
dff50e09 3217 false))
2f1a1aea 3218 {
bb2e3076 3219 // NB: lvalueness is checked during elaboration / translation
2f1a1aea 3220 assignment* e = new assignment;
56099f08 3221 e->left = op1;
47d349b1 3222 e->op = t->content;
56099f08 3223 e->tok = t;
2f1a1aea 3224 next ();
82919855 3225 e->right = parse_expression ();
56099f08 3226 op1 = e;
2f1a1aea 3227 }
56099f08
FCE
3228
3229 return op1;
2f1a1aea
FCE
3230}
3231
3232
3233expression*
3234parser::parse_ternary ()
3235{
3236 expression* op1 = parse_logical_or ();
3237
3238 const token* t = peek ();
3239 if (t && t->type == tok_operator && t->content == "?")
3240 {
2f1a1aea 3241 ternary_expression* e = new ternary_expression;
56099f08 3242 e->tok = t;
2f1a1aea 3243 e->cond = op1;
56099f08
FCE
3244 next ();
3245 e->truevalue = parse_expression (); // XXX
2f1a1aea
FCE
3246
3247 t = next ();
3248 if (! (t->type == tok_operator && t->content == ":"))
f0454224 3249 throw PARSE_ERROR (_("expected ':'"));
731a5359 3250 swallow ();
2f1a1aea 3251
56099f08 3252 e->falsevalue = parse_expression (); // XXX
2f1a1aea
FCE
3253 return e;
3254 }
3255 else
3256 return op1;
3257}
3258
3259
3260expression*
3261parser::parse_logical_or ()
3262{
3263 expression* op1 = parse_logical_and ();
dff50e09 3264
2f1a1aea 3265 const token* t = peek ();
56099f08 3266 while (t && t->type == tok_operator && t->content == "||")
2f1a1aea 3267 {
2f1a1aea 3268 logical_or_expr* e = new logical_or_expr;
56099f08 3269 e->tok = t;
47d349b1 3270 e->op = t->content;
2f1a1aea 3271 e->left = op1;
56099f08
FCE
3272 next ();
3273 e->right = parse_logical_and ();
3274 op1 = e;
3275 t = peek ();
2f1a1aea 3276 }
56099f08
FCE
3277
3278 return op1;
2f1a1aea
FCE
3279}
3280
3281
3282expression*
3283parser::parse_logical_and ()
3284{
bb2e3076 3285 expression* op1 = parse_boolean_or ();
2f1a1aea
FCE
3286
3287 const token* t = peek ();
56099f08 3288 while (t && t->type == tok_operator && t->content == "&&")
2f1a1aea 3289 {
2f1a1aea
FCE
3290 logical_and_expr *e = new logical_and_expr;
3291 e->left = op1;
47d349b1 3292 e->op = t->content;
56099f08
FCE
3293 e->tok = t;
3294 next ();
bb2e3076
FCE
3295 e->right = parse_boolean_or ();
3296 op1 = e;
3297 t = peek ();
3298 }
3299
3300 return op1;
3301}
3302
3303
3304expression*
3305parser::parse_boolean_or ()
3306{
3307 expression* op1 = parse_boolean_xor ();
3308
3309 const token* t = peek ();
3310 while (t && t->type == tok_operator && t->content == "|")
3311 {
3312 binary_expression* e = new binary_expression;
3313 e->left = op1;
47d349b1 3314 e->op = t->content;
bb2e3076
FCE
3315 e->tok = t;
3316 next ();
3317 e->right = parse_boolean_xor ();
3318 op1 = e;
3319 t = peek ();
3320 }
3321
3322 return op1;
3323}
3324
3325
3326expression*
3327parser::parse_boolean_xor ()
3328{
3329 expression* op1 = parse_boolean_and ();
3330
3331 const token* t = peek ();
3332 while (t && t->type == tok_operator && t->content == "^")
3333 {
3334 binary_expression* e = new binary_expression;
3335 e->left = op1;
47d349b1 3336 e->op = t->content;
bb2e3076
FCE
3337 e->tok = t;
3338 next ();
3339 e->right = parse_boolean_and ();
3340 op1 = e;
3341 t = peek ();
3342 }
3343
3344 return op1;
3345}
3346
3347
3348expression*
3349parser::parse_boolean_and ()
3350{
3351 expression* op1 = parse_array_in ();
3352
3353 const token* t = peek ();
3354 while (t && t->type == tok_operator && t->content == "&")
3355 {
3356 binary_expression* e = new binary_expression;
3357 e->left = op1;
47d349b1 3358 e->op = t->content;
bb2e3076
FCE
3359 e->tok = t;
3360 next ();
56099f08
FCE
3361 e->right = parse_array_in ();
3362 op1 = e;
3363 t = peek ();
2f1a1aea 3364 }
56099f08
FCE
3365
3366 return op1;
2f1a1aea
FCE
3367}
3368
3369
3370expression*
3371parser::parse_array_in ()
3372{
ce10591c 3373 // This is a very tricky case. All these are legit expressions:
69c68955 3374 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
ce10591c
FCE
3375 vector<expression*> indexes;
3376 bool parenthesized = false;
2f1a1aea
FCE
3377
3378 const token* t = peek ();
69c68955 3379 if (t && t->type == tok_operator && t->content == "[")
ce10591c 3380 {
731a5359 3381 swallow ();
ce10591c
FCE
3382 parenthesized = true;
3383 }
3384
3385 while (1)
3386 {
e225e273
AJ
3387 t = peek();
3388 if (t && t->type == tok_operator && t->content == "*" && parenthesized)
3389 {
45af9d1b
AJ
3390 swallow();
3391 indexes.push_back(NULL);
e225e273
AJ
3392 }
3393 else
3394 {
3395 expression* op1 = parse_comparison_or_regex_query ();
3396 indexes.push_back (op1);
3397 }
ce10591c
FCE
3398
3399 if (parenthesized)
3400 {
3401 const token* t = peek ();
3402 if (t && t->type == tok_operator && t->content == ",")
3403 {
731a5359 3404 swallow ();
ce10591c
FCE
3405 continue;
3406 }
69c68955 3407 else if (t && t->type == tok_operator && t->content == "]")
ce10591c 3408 {
731a5359 3409 swallow ();
ce10591c
FCE
3410 break;
3411 }
dff50e09 3412 else
f0454224 3413 throw PARSE_ERROR (_("expected ',' or ']'"));
ce10591c
FCE
3414 }
3415 else
3416 break; // expecting only one expression
3417 }
3418
3419 t = peek ();
6e213f58 3420 if (t && t->type == tok_keyword && t->content == "in")
2f1a1aea 3421 {
2f1a1aea 3422 array_in *e = new array_in;
56099f08 3423 e->tok = t;
731a5359 3424 next ();
ce10591c
FCE
3425
3426 arrayindex* a = new arrayindex;
3427 a->indexes = indexes;
d02548c0 3428 a->base = parse_indexable();
d15d767c 3429 a->tok = a->base->tok;
ce10591c 3430 e->operand = a;
2f1a1aea
FCE
3431 return e;
3432 }
ce10591c
FCE
3433 else if (indexes.size() == 1) // no "in" - need one expression only
3434 return indexes[0];
2f1a1aea 3435 else
f0454224 3436 throw PARSE_ERROR (_("unexpected comma-separated expression list"));
2f1a1aea
FCE
3437}
3438
3439
3440expression*
93daaca8 3441parser::parse_comparison_or_regex_query ()
2f1a1aea 3442{
bb2e3076 3443 expression* op1 = parse_shift ();
2f1a1aea 3444
557abe61 3445 // XXX precedence -- perhaps a =~ b == c =~ d --> (a =~ b) == (c =~ d) ?
93daaca8
SM
3446 const token *t = peek();
3447 if (t && t->type == tok_operator
3448 && (t->content == "=~" ||
3449 t->content == "!~"))
3450 {
3451 regex_query* r = new regex_query;
3452 r->left = op1;
47d349b1 3453 r->op = t->content;
93daaca8
SM
3454 r->tok = t;
3455 next ();
d3bc48f0 3456 r->right = parse_literal_string();
93daaca8
SM
3457 op1 = r;
3458 t = peek ();
3459 }
3460 else while (t && t->type == tok_operator
553d27a5
FCE
3461 && (t->content == ">" ||
3462 t->content == "<" ||
3463 t->content == "==" ||
3464 t->content == "!=" ||
3465 t->content == "<=" ||
bb2e3076 3466 t->content == ">="))
2f1a1aea
FCE
3467 {
3468 comparison* e = new comparison;
3469 e->left = op1;
47d349b1 3470 e->op = t->content;
56099f08 3471 e->tok = t;
2f1a1aea 3472 next ();
bb2e3076
FCE
3473 e->right = parse_shift ();
3474 op1 = e;
3475 t = peek ();
3476 }
3477
3478 return op1;
3479}
3480
3481
3482expression*
3483parser::parse_shift ()
3484{
3485 expression* op1 = parse_concatenation ();
3486
3487 const token* t = peek ();
dff50e09 3488 while (t && t->type == tok_operator &&
bb2e3076
FCE
3489 (t->content == "<<" || t->content == ">>"))
3490 {
3491 binary_expression* e = new binary_expression;
3492 e->left = op1;
47d349b1 3493 e->op = t->content;
bb2e3076
FCE
3494 e->tok = t;
3495 next ();
56099f08
FCE
3496 e->right = parse_concatenation ();
3497 op1 = e;
3498 t = peek ();
2f1a1aea 3499 }
56099f08
FCE
3500
3501 return op1;
2f1a1aea
FCE
3502}
3503
3504
3505expression*
3506parser::parse_concatenation ()
3507{
3508 expression* op1 = parse_additive ();
3509
3510 const token* t = peek ();
3511 // XXX: the actual awk string-concatenation operator is *whitespace*.
3512 // I don't know how to easily to model that here.
56099f08 3513 while (t && t->type == tok_operator && t->content == ".")
2f1a1aea
FCE
3514 {
3515 concatenation* e = new concatenation;
3516 e->left = op1;
47d349b1 3517 e->op = t->content;
56099f08 3518 e->tok = t;
2f1a1aea 3519 next ();
56099f08
FCE
3520 e->right = parse_additive ();
3521 op1 = e;
3522 t = peek ();
2f1a1aea 3523 }
56099f08
FCE
3524
3525 return op1;
2f1a1aea
FCE
3526}
3527
3528
3529expression*
3530parser::parse_additive ()
3531{
3532 expression* op1 = parse_multiplicative ();
3533
3534 const token* t = peek ();
dff50e09 3535 while (t && t->type == tok_operator
2f1a1aea
FCE
3536 && (t->content == "+" || t->content == "-"))
3537 {
3538 binary_expression* e = new binary_expression;
47d349b1 3539 e->op = t->content;
2f1a1aea 3540 e->left = op1;
56099f08 3541 e->tok = t;
2f1a1aea 3542 next ();
56099f08
FCE
3543 e->right = parse_multiplicative ();
3544 op1 = e;
3545 t = peek ();
2f1a1aea 3546 }
56099f08
FCE
3547
3548 return op1;
2f1a1aea
FCE
3549}
3550
3551
3552expression*
3553parser::parse_multiplicative ()
3554{
3555 expression* op1 = parse_unary ();
3556
3557 const token* t = peek ();
dff50e09 3558 while (t && t->type == tok_operator
2f1a1aea
FCE
3559 && (t->content == "*" || t->content == "/" || t->content == "%"))
3560 {
3561 binary_expression* e = new binary_expression;
47d349b1 3562 e->op = t->content;
2f1a1aea 3563 e->left = op1;
56099f08 3564 e->tok = t;
2f1a1aea 3565 next ();
56099f08
FCE
3566 e->right = parse_unary ();
3567 op1 = e;
3568 t = peek ();
2f1a1aea 3569 }
56099f08
FCE
3570
3571 return op1;
2f1a1aea
FCE
3572}
3573
3574
3575expression*
3576parser::parse_unary ()
3577{
3578 const token* t = peek ();
dff50e09
FCE
3579 if (t && t->type == tok_operator
3580 && (t->content == "+" ||
3581 t->content == "-" ||
bb2e3076
FCE
3582 t->content == "!" ||
3583 t->content == "~" ||
3584 false))
2f1a1aea
FCE
3585 {
3586 unary_expression* e = new unary_expression;
47d349b1 3587 e->op = t->content;
56099f08 3588 e->tok = t;
2f1a1aea 3589 next ();
1cb79a72 3590 e->operand = parse_unary ();
2f1a1aea
FCE
3591 return e;
3592 }
3593 else
bb2e3076 3594 return parse_crement ();
2f1a1aea
FCE
3595}
3596
3597
3598expression*
3599parser::parse_crement () // as in "increment" / "decrement"
3600{
cbfbbf69
FCE
3601 // NB: Ideally, we'd parse only a symbol as an operand to the
3602 // *crement operators, instead of a general expression value. We'd
3603 // need more complex lookahead code to tell apart the postfix cases.
3604 // So we just punt, and leave it to pass-3 to signal errors on
3605 // cases like "4++".
3606
2f1a1aea 3607 const token* t = peek ();
dff50e09 3608 if (t && t->type == tok_operator
2f1a1aea
FCE
3609 && (t->content == "++" || t->content == "--"))
3610 {
3611 pre_crement* e = new pre_crement;
47d349b1 3612 e->op = t->content;
56099f08 3613 e->tok = t;
2f1a1aea 3614 next ();
0fb0cac9 3615 e->operand = parse_dwarf_value ();
2f1a1aea
FCE
3616 return e;
3617 }
3618
3619 // post-crement or non-crement
0fb0cac9 3620 expression *op1 = parse_dwarf_value ();
dff50e09 3621
2f1a1aea 3622 t = peek ();
dff50e09 3623 if (t && t->type == tok_operator
2f1a1aea
FCE
3624 && (t->content == "++" || t->content == "--"))
3625 {
3626 post_crement* e = new post_crement;
47d349b1 3627 e->op = t->content;
56099f08 3628 e->tok = t;
2f1a1aea
FCE
3629 next ();
3630 e->operand = op1;
3631 return e;
3632 }
3633 else
3634 return op1;
3635}
3636
3637
0fb0cac9
JS
3638expression*
3639parser::parse_dwarf_value ()
3640{
3641 expression* expr = NULL;
3642 target_symbol* tsym = NULL;
3643
3644 // With '&' we'll definitely be making a target symbol of some sort
251707c8
JS
3645 const token* addrtok = peek_op ("&") ? next () : NULL;
3646 bool addressof = (addrtok != NULL);
0fb0cac9
JS
3647
3648 // First try target_symbol types: $var, @cast, and @var.
0fb0cac9
JS
3649 const token* t = peek ();
3650 if (t && t->type == tok_identifier && t->content[0] == '$')
3651 expr = tsym = parse_target_symbol ();
3652 else if (tok_is (t, tok_operator, "@cast"))
3653 expr = tsym = parse_cast_op ();
3654 else if (tok_is (t, tok_operator, "@var"))
3655 expr = tsym = parse_atvar_op ();
f8405ea5 3656 else if (addressof && !input.has_version("2.6"))
eff66d40
JS
3657 // '&' on old version only allowed specific target_symbol types
3658 throw PARSE_ERROR (_("expected @cast, @var or $var"));
0fb0cac9 3659 else
eff66d40 3660 // Otherwise just get a plain value of any sort.
0fb0cac9
JS
3661 expr = parse_value ();
3662
3663 // If we had '&' or see any target suffixes, that forces a target_symbol.
eff66d40
JS
3664 // For compatibility, we only do this starting with 2.6.
3665 if (!tsym && (addressof || peek_target_symbol_components ())
f8405ea5 3666 && input.has_version("2.6"))
0fb0cac9 3667 {
251707c8
JS
3668 autocast_op *cop = new autocast_op;
3669 cop->tok = addrtok ?: peek ();
0fb0cac9
JS
3670 cop->operand = expr;
3671 expr = tsym = cop;
3672 }
3673
3674 if (tsym)
3675 {
3676 // Parse the rest of any kind of target symbol
3677 tsym->addressof = addressof;
3678 parse_target_symbol_components (tsym);
3679 }
3680
3681 return expr;
3682}
3683
3684
2f1a1aea
FCE
3685expression*
3686parser::parse_value ()
3687{
3688 const token* t = peek ();
3689 if (! t)
f0454224 3690 throw PARSE_ERROR (_("expected value"));
2f1a1aea 3691
7d902887
FCE
3692 if (t->type == tok_embedded)
3693 {
7d902887 3694 if (! privileged)
f0454224 3695 throw PARSE_ERROR (_("embedded expression code in unprivileged script; need stap -g"), false);
7d902887
FCE
3696
3697 embedded_expr *e = new embedded_expr;
3698 e->tok = t;
47d349b1 3699 e->code = t->content;
731a5359 3700 next ();
7d902887
FCE
3701 return e;
3702 }
3703
2f1a1aea
FCE
3704 if (t->type == tok_operator && t->content == "(")
3705 {
731a5359 3706 swallow ();
2f1a1aea
FCE
3707 expression* e = parse_expression ();
3708 t = next ();
3709 if (! (t->type == tok_operator && t->content == ")"))
f0454224 3710 throw PARSE_ERROR (_("expected ')'"));
731a5359 3711 swallow ();
2f1a1aea
FCE
3712 return e;
3713 }
06219d6f
SM
3714 else if (t->type == tok_identifier
3715 || (t->type == tok_operator && t->content[0] == '@'))
2f1a1aea
FCE
3716 return parse_symbol ();
3717 else
3718 return parse_literal ();
3719}
3720
3721
d02548c0 3722const token *
b1f2b0e8 3723parser::parse_hist_op_or_bare_name (hist_op *&hop, interned_string &name)
d02548c0
GH
3724{
3725 hop = NULL;
50cc7cd5 3726 const token* t = expect_ident_or_atword (name);
d02548c0
GH
3727 if (name == "@hist_linear" || name == "@hist_log")
3728 {
3729 hop = new hist_op;
3730 if (name == "@hist_linear")
3731 hop->htype = hist_linear;
3732 else if (name == "@hist_log")
3733 hop->htype = hist_log;
3734 hop->tok = t;
3735 expect_op("(");
3736 hop->stat = parse_expression ();
3737 int64_t tnum;
3738 if (hop->htype == hist_linear)
3739 {
3740 for (size_t i = 0; i < 3; ++i)
3741 {
3742 expect_op (",");
3743 expect_number (tnum);
3744 hop->params.push_back (tnum);
3745 }
3746 }
d02548c0
GH
3747 expect_op(")");
3748 }
3749 return t;
3750}
3751
3752
3753indexable*
3754parser::parse_indexable ()
3755{
3756 hist_op *hop = NULL;
b1f2b0e8 3757 interned_string name;
d02548c0
GH
3758 const token *tok = parse_hist_op_or_bare_name(hop, name);
3759 if (hop)
3760 return hop;
3761 else
3762 {
3763 symbol* sym = new symbol;
3764 sym->name = name;
3765 sym->tok = tok;
3766 return sym;
3767 }
3768}
3769
3770
0fb0cac9
JS
3771// var, indexable[index], func(parms), printf("...", ...),
3772// @defined, @entry, @stat_op(stat)
30263a73 3773expression* parser::parse_symbol ()
2f1a1aea 3774{
d02548c0
GH
3775 hist_op *hop = NULL;
3776 symbol *sym = NULL;
b1f2b0e8 3777 interned_string name;
d02548c0
GH
3778 const token *t = parse_hist_op_or_bare_name(hop, name);
3779
3780 if (!hop)
0fefb486 3781 {
dff50e09 3782 // If we didn't get a hist_op, then we did get an identifier. We can
d02548c0 3783 // now scrutinize this identifier for the various magic forms of identifier
0fb0cac9 3784 // (printf, @stat_op...)
9b5af295 3785
db135493
FCE
3786 // NB: PR11343: @defined() is not incompatible with earlier versions
3787 // of stap, so no need to check session.compatible for 1.2
30263a73
FCE
3788 if (name == "@defined")
3789 return parse_defined_op (t);
8cc799a5 3790
0a7eb12d
MC
3791 if (name == "@const")
3792 return parse_const_op (t);
3793
8cc799a5
JS
3794 if (name == "@entry")
3795 return parse_entry_op (t);
3796
3689db05
SC
3797 if (name == "@perf")
3798 return parse_perf_op (t);
3799
cc9001af 3800 if (name.size() > 0 && name[0] == '@')
d7f3e0c5 3801 {
d02548c0
GH
3802 stat_op *sop = new stat_op;
3803 if (name == "@avg")
3804 sop->ctype = sc_average;
3805 else if (name == "@count")
3806 sop->ctype = sc_count;
3807 else if (name == "@sum")
3808 sop->ctype = sc_sum;
3809 else if (name == "@min")
3810 sop->ctype = sc_min;
3811 else if (name == "@max")
3812 sop->ctype = sc_max;
3813 else
b1f2b0e8
JS
3814 throw PARSE_ERROR(_F("unknown operator %s",
3815 name.to_string().c_str()));
d02548c0
GH
3816 expect_op("(");
3817 sop->tok = t;
3818 sop->stat = parse_expression ();
3819 expect_op(")");
3820 return sop;
3821 }
dff50e09 3822
d5e178c1 3823 else if (print_format *fmt = print_format::create(t))
d02548c0 3824 {
d02548c0 3825 expect_op("(");
b15c465c
PP
3826 if ((name == "print" || name == "println" ||
3827 name == "sprint" || name == "sprintln") &&
f34254da 3828 (peek_op("@hist_linear") || peek_op("@hist_log")))
a4636912
GH
3829 {
3830 // We have a special case where we recognize
3831 // print(@hist_foo(bar)) as a magic print-the-histogram
3832 // construct. This is sort of gross but it avoids
3833 // promoting histogram references to typeful
3834 // expressions.
dff50e09 3835
1bbeef03
GH
3836 hop = NULL;
3837 t = parse_hist_op_or_bare_name(hop, name);
3838 assert(hop);
dff50e09 3839
1bbeef03
GH
3840 // It is, sadly, possible that even while parsing a
3841 // hist_op, we *mis-guessed* and the user wishes to
3842 // print(@hist_op(foo)[bucket]), a scalar. In that case
3843 // we must parse the arrayindex and print an expression.
839325a1
JS
3844 //
3845 // XXX: This still fails if the arrayindex is part of a
3846 // larger expression. To really handle everything, we'd
3847 // need to push back all the hist tokens start over.
dff50e09 3848
1bbeef03
GH
3849 if (!peek_op ("["))
3850 fmt->hist = hop;
3851 else
3852 {
3853 // This is simplified version of the
3854 // multi-array-index parser below, because we can
3855 // only ever have one index on a histogram anyways.
3856 expect_op("[");
3857 struct arrayindex* ai = new arrayindex;
3858 ai->tok = t;
3859 ai->base = hop;
3860 ai->indexes.push_back (parse_expression ());
3861 expect_op("]");
3862 fmt->args.push_back(ai);
839325a1
JS
3863
3864 // Consume any subsequent arguments.
1efdc9a9 3865 while (expect_op_any ({",", ")"}) != ")")
839325a1 3866 {
839325a1
JS
3867 expression *e = parse_expression ();
3868 fmt->args.push_back(e);
3869 }
1bbeef03 3870 }
a4636912 3871 }
d7f3e0c5 3872 else
d02548c0 3873 {
3cb17058 3874 int min_args = 0;
80cb29eb 3875 bool consumed_arg = false;
3cb17058
JS
3876 if (fmt->print_with_format)
3877 {
3878 // Consume and convert a format string. Agreement between the
3879 // format string and the arguments is postponed to the
3880 // typechecking phase.
c92d3b42 3881 literal_string* ls = parse_literal_string();
47d349b1 3882 fmt->raw_components = ls->value;
c92d3b42
FCE
3883 delete ls;
3884 fmt->components = print_format::string_to_components (fmt->raw_components);
80cb29eb 3885 consumed_arg = true;
3cb17058
JS
3886 }
3887 else if (fmt->print_with_delim)
3888 {
3889 // Consume a delimiter to separate arguments.
c92d3b42 3890 literal_string* ls = parse_literal_string();
d70e3afe 3891 fmt->delimiter = ls->value;
c92d3b42 3892 delete ls;
80cb29eb
JL
3893 consumed_arg = true;
3894 min_args = 2; // so that the delim is used at least once
3cb17058 3895 }
80cb29eb 3896 else if (!fmt->print_with_newline)
3cb17058 3897 {
80cb29eb
JL
3898 // If we are not printing with a format string, nor with a
3899 // delim, nor with a newline, then it's either print() or
3900 // sprint(), both of which require at least one argument (of
3901 // any type).
3902 min_args = 1;
3cb17058
JS
3903 }
3904
3905 // Consume any subsequent arguments.
3906 while (min_args || !peek_op (")"))
3907 {
f0a49a41
FL
3908 // ')' is not possible here but we want to output a nicer
3909 // parser error message.
80cb29eb 3910 if (consumed_arg)
1efdc9a9 3911 (void) expect_op_any({",", ")"});
3cb17058
JS
3912 expression *e = parse_expression ();
3913 fmt->args.push_back(e);
80cb29eb 3914 consumed_arg = true;
3cb17058
JS
3915 if (min_args)
3916 --min_args;
3917 }
1efdc9a9 3918 expect_op(")");
d02548c0 3919 }
d02548c0
GH
3920 return fmt;
3921 }
dff50e09 3922
d02548c0
GH
3923 else if (peek_op ("(")) // function call
3924 {
731a5359 3925 swallow ();
d02548c0
GH
3926 struct functioncall* f = new functioncall;
3927 f->tok = t;
3928 f->function = name;
3929 // Allow empty actual parameter list
3930 if (peek_op (")"))
3931 {
731a5359 3932 swallow ();
d02548c0
GH
3933 return f;
3934 }
3935 while (1)
3936 {
3937 f->args.push_back (parse_expression ());
1efdc9a9
FL
3938 interned_string op = expect_op_any({")", ","});
3939 if (op == ")")
3940 break;
3941 else if (op == ",")
3942 continue;
d02548c0
GH
3943 }
3944 return f;
3945 }
3946
3947 else
3948 {
3949 sym = new symbol;
3950 sym->name = name;
3951 sym->tok = t;
d7f3e0c5 3952 }
0fefb486 3953 }
dff50e09
FCE
3954
3955 // By now, either we had a hist_op in the first place, or else
d02548c0
GH
3956 // we had a plain word and it was converted to a symbol.
3957
70c743d8 3958 assert (!hop != !sym); // logical XOR
d02548c0
GH
3959
3960 // All that remains is to check for array indexing
3961
d7f3e0c5 3962 if (peek_op ("[")) // array
2f1a1aea 3963 {
731a5359 3964 swallow ();
2f1a1aea 3965 struct arrayindex* ai = new arrayindex;
d02548c0
GH
3966 ai->tok = t;
3967
3968 if (hop)
3969 ai->base = hop;
3970 else
3971 ai->base = sym;
3972
2f1a1aea
FCE
3973 while (1)
3974 {
a98c930b
AJ
3975 if (peek_op("*"))
3976 {
45af9d1b
AJ
3977 swallow();
3978 ai->indexes.push_back (NULL);
a98c930b
AJ
3979 }
3980 else
3981 ai->indexes.push_back (parse_expression ());
1efdc9a9
FL
3982 interned_string op = expect_op_any({"]", ","});
3983 if (op == "]")
3984 break;
616c2df5 3985 else if (op == ",")
1efdc9a9 3986 continue;
2f1a1aea 3987 }
0fb0cac9 3988
2f1a1aea
FCE
3989 return ai;
3990 }
d02548c0
GH
3991
3992 // If we got to here, we *should* have a symbol; if we have
3993 // a hist_op on its own, it doesn't count as an expression,
3994 // so we throw a parse error.
3995
3996 if (hop)
f0454224 3997 throw PARSE_ERROR(_("base histogram operator where expression expected"), t);
dff50e09
FCE
3998
3999 return sym;
2f1a1aea 4000}
56099f08 4001
0fb0cac9
JS
4002// Parse a $var.
4003target_symbol* parser::parse_target_symbol ()
30263a73 4004{
0fb0cac9
JS
4005 const token* t = next ();
4006 if (t->type == tok_identifier && t->content[0]=='$')
d48afc20 4007 {
0fb0cac9
JS
4008 // target_symbol time
4009 target_symbol *tsym = new target_symbol;
4010 tsym->tok = t;
a3e980f9 4011 tsym->name = t->content;
0fb0cac9 4012 return tsym;
d48afc20
JS
4013 }
4014
0fb0cac9
JS
4015 throw PARSE_ERROR (_("expected $var"));
4016}
4017
4018
4019// Parse a @cast.
4020cast_op* parser::parse_cast_op ()
4021{
4022 const token* t = next ();
06219d6f 4023 if (t->type == tok_operator && t->content == "@cast")
30263a73
FCE
4024 {
4025 cast_op *cop = new cast_op;
4026 cop->tok = t;
a3e980f9 4027 cop->name = t->content;
30263a73
FCE
4028 expect_op("(");
4029 cop->operand = parse_expression ();
4030 expect_op(",");
7f6b80bd 4031 expect_unknown(tok_string, cop->type_name);
0fb0cac9
JS
4032 if (cop->type_name.empty())
4033 throw PARSE_ERROR (_("expected non-empty string"));
30263a73
FCE
4034 if (peek_op (","))
4035 {
731a5359 4036 swallow ();
30263a73
FCE
4037 expect_unknown(tok_string, cop->module);
4038 }
4039 expect_op(")");
30263a73
FCE
4040 return cop;
4041 }
4042
0fb0cac9
JS
4043 throw PARSE_ERROR (_("expected @cast"));
4044}
4045
30263a73 4046
0fb0cac9
JS
4047// Parse a @var.
4048atvar_op* parser::parse_atvar_op ()
4049{
4050 const token* t = next ();
06219d6f 4051 if (t->type == tok_operator && t->content == "@var")
cc9001af 4052 {
bd1fcbad
YZ
4053 atvar_op *aop = new atvar_op;
4054 aop->tok = t;
a3e980f9 4055 aop->name = t->content;
cc9001af 4056 expect_op("(");
bd1fcbad
YZ
4057 expect_unknown(tok_string, aop->target_name);
4058 size_t found_at = aop->target_name.find("@");
bfa7e523 4059 if (found_at != string::npos)
bd1fcbad 4060 aop->cu_name = aop->target_name.substr(found_at + 1);
bfa7e523 4061 else
bd1fcbad
YZ
4062 aop->cu_name = "";
4063 if (peek_op (","))
4064 {
4065 swallow ();
4066 expect_unknown (tok_string, aop->module);
4067 }
4068 else
4069 aop->module = "";
cc9001af 4070 expect_op(")");
bd1fcbad 4071 return aop;
cc9001af
MW
4072 }
4073
0fb0cac9 4074 throw PARSE_ERROR (_("expected @var"));
30263a73
FCE
4075}
4076
4077
4078// Parse a @defined(). Given head token has already been consumed.
4079expression* parser::parse_defined_op (const token* t)
4080{
4081 defined_op* dop = new defined_op;
4082 dop->tok = t;
4083 expect_op("(");
0fb0cac9 4084 dop->operand = parse_expression ();
30263a73
FCE
4085 expect_op(")");
4086 return dop;
4087}
4088
4089
0a7eb12d
MC
4090// Parse a @const(). Given head token has already been consumed.
4091expression* parser::parse_const_op (const token* t)
4092{
4093 if (! privileged)
4094 throw PARSE_ERROR (_("using @const operator not permitted; need stap -g"),
4095 false /* don't skip tokens for parse resumption */);
4096
a7b0fd27
MC
4097 interned_string cnst;
4098 embedded_expr *ee = new embedded_expr;
4099 ee->tok = t;
0a7eb12d 4100 expect_op("(");
a7b0fd27
MC
4101 expect_unknown(tok_string, cnst);
4102 if(cnst.empty())
0a7eb12d
MC
4103 throw PARSE_ERROR (_("expected non-empty string"));
4104 expect_op(")");
a7b0fd27
MC
4105 ee->code = string("/* pure */ /* unprivileged */ /* stable */ ") + string(cnst);
4106 return ee;
0a7eb12d
MC
4107}
4108
4109
8cc799a5
JS
4110// Parse a @entry(). Given head token has already been consumed.
4111expression* parser::parse_entry_op (const token* t)
4112{
4113 entry_op* eop = new entry_op;
4114 eop->tok = t;
4115 expect_op("(");
4116 eop->operand = parse_expression ();
4117 expect_op(")");
4118 return eop;
4119}
4120
4121
3689db05
SC
4122// Parse a @perf(). Given head token has already been consumed.
4123expression* parser::parse_perf_op (const token* t)
4124{
4125 perf_op* pop = new perf_op;
4126 pop->tok = t;
4127 expect_op("(");
ace7c23f
FCE
4128 pop->operand = parse_literal_string ();
4129 if (pop->operand->value == "")
f0454224 4130 throw PARSE_ERROR (_("expected non-empty string"));
3689db05
SC
4131 expect_op(")");
4132 return pop;
4133}
4134
4135
0fb0cac9
JS
4136bool
4137parser::peek_target_symbol_components ()
4138{
3ddcf938
JS
4139 const token * t = peek ();
4140 return t &&
4141 ((t->type == tok_operator && (t->content == "->" || t->content == "["))
4142 || (t->type == tok_identifier &&
4143 t->content.find_first_not_of('$') == string::npos));
0fb0cac9 4144}
30263a73 4145
81931eab
JS
4146void
4147parser::parse_target_symbol_components (target_symbol* e)
4148{
5f36109e
JS
4149 bool pprint = false;
4150
4151 // check for pretty-print in the form $foo$
47d349b1 4152 string base = e->name;
5f36109e
JS
4153 size_t pprint_pos = base.find_last_not_of('$');
4154 if (0 < pprint_pos && pprint_pos < base.length() - 1)
4155 {
4156 string pprint_val = base.substr(pprint_pos + 1);
4157 base.erase(pprint_pos + 1);
47d349b1 4158 e->name = base;
5f36109e
JS
4159 e->components.push_back (target_symbol::component(e->tok, pprint_val, true));
4160 pprint = true;
4161 }
4162
4163 while (!pprint)
81931eab 4164 {
81931eab
JS
4165 if (peek_op ("->"))
4166 {
c67847a0 4167 const token* t = next();
b1f2b0e8 4168 interned_string member;
c67847a0 4169 expect_ident_or_keyword (member);
5f36109e
JS
4170
4171 // check for pretty-print in the form $foo->$ or $foo->bar$
4172 pprint_pos = member.find_last_not_of('$');
b1f2b0e8 4173 interned_string pprint_val;
5f36109e
JS
4174 if (pprint_pos == string::npos || pprint_pos < member.length() - 1)
4175 {
4176 pprint_val = member.substr(pprint_pos + 1);
b1f2b0e8 4177 member = member.substr(0, pprint_pos + 1);
5f36109e
JS
4178 pprint = true;
4179 }
4180
4181 if (!member.empty())
4182 e->components.push_back (target_symbol::component(t, member));
4183 if (pprint)
4184 e->components.push_back (target_symbol::component(t, pprint_val, true));
81931eab
JS
4185 }
4186 else if (peek_op ("["))
4187 {
c67847a0 4188 const token* t = next();
6fda2dff
JS
4189 expression* index = parse_expression();
4190 literal_number* ln = dynamic_cast<literal_number*>(index);
4191 if (ln)
4192 e->components.push_back (target_symbol::component(t, ln->value));
4193 else
4194 e->components.push_back (target_symbol::component(t, index));
81931eab 4195 expect_op ("]");
81931eab
JS
4196 }
4197 else
4198 break;
4199 }
5f36109e
JS
4200
4201 if (!pprint)
4202 {
4203 // check for pretty-print in the form $foo $
4204 // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$
4205 const token* t = peek();
3819d181 4206 if (t != NULL && t->type == tok_identifier &&
5f36109e
JS
4207 t->content.find_first_not_of('$') == string::npos)
4208 {
4209 t = next();
47d349b1 4210 e->components.push_back (target_symbol::component(t, t->content, true));
5f36109e
JS
4211 pprint = true;
4212 }
4213 }
4214
4215 if (pprint && (peek_op ("->") || peek_op("[")))
f0454224 4216 throw PARSE_ERROR(_("-> and [ are not accepted for a pretty-printing variable"));
81931eab
JS
4217}
4218
73267b89 4219/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.754951 seconds and 5 git commands to generate.