]> sourceware.org Git - systemtap.git/blame - parse.cxx
stapbpf PR22330 fixes :: identify format types of pe_unknown arguments
[systemtap.git] / parse.cxx
CommitLineData
2f1a1aea 1// recursive descent parser for systemtap scripts
73fcca6f 2// Copyright (C) 2005-2018 Red Hat Inc.
77a5c1f9 3// Copyright (C) 2006 Intel Corporation.
5811366a 4// Copyright (C) 2007 Bull S.A.S
92585d32 5// Copyright (C) 2014 Peter Kjellstrom <cap@nsc.liu.se>
69c68955
FCE
6//
7// This file is part of systemtap, and is free software. You can
8// redistribute it and/or modify it under the terms of the GNU General
9// Public License (GPL); either version 2, or (at your option) any
10// later version.
2f1a1aea 11
2b066ec1 12#include "config.h"
2f1a1aea
FCE
13#include "staptree.h"
14#include "parse.h"
177a8ead 15#include "session.h"
3f99432c 16#include "util.h"
aa389a19 17#include "stringtable.h"
3f99432c 18
2b066ec1 19#include <iostream>
eacb10ce 20
2b066ec1 21#include <fstream>
2f1a1aea 22#include <cctype>
9c0c0e46 23#include <cstdlib>
29e64872 24#include <cassert>
9c0c0e46
FCE
25#include <cerrno>
26#include <climits>
57b73400 27#include <sstream>
f74fb737 28#include <cstring>
3f99432c 29#include <cctype>
eacb10ce 30#include <iterator>
5d46f7cb 31#include <unordered_set>
eacb10ce 32
7a468d68
FCE
33extern "C" {
34#include <fnmatch.h>
35}
2f1a1aea
FCE
36
37using namespace std;
38
c18f07f8 39
4e0d0027 40class parser;
c18f07f8
JS
41class lexer
42{
43public:
fee28e5c 44 bool ate_comment; // current token follows a comment
b5477cd9 45 bool ate_whitespace; // the most recent token followed whitespace
534aad8b 46 bool saw_tokens; // the lexer found tokens (before preprocessing occurred)
f8405ea5 47 bool check_compatible; // whether to gate features on session.compatible
534aad8b 48
b5477cd9 49 token* scan ();
8518e54f 50 lexer (istream&, const string&, systemtap_session&, bool);
c18f07f8 51 void set_current_file (stapfile* f);
101b0805 52 void set_current_token_chain (const token* tok);
f8405ea5 53 inline bool has_version (const char* v) const;
c18f07f8 54
5775f11f 55 unordered_set<interned_string> keywords;
85c97fc2 56 static unordered_set<string> atwords;
c18f07f8
JS
57private:
58 inline int input_get ();
59 inline int input_peek (unsigned n=0);
aa389a19 60 void input_put (const string&, const token*);
c18f07f8 61 string input_name;
47d349b1 62 string input_contents; // NB: being a temporary, no need to interned_string optimize this object
aa389a19 63 const char *input_pointer; // index into input_contents; NB: recompute if input_contents changed!
c18f07f8 64 const char *input_end;
aa389a19
FCE
65 unsigned cursor_suspend_count;
66 unsigned cursor_suspend_line;
67 unsigned cursor_suspend_column;
c18f07f8
JS
68 unsigned cursor_line;
69 unsigned cursor_column;
70 systemtap_session& session;
71 stapfile* current_file;
101b0805 72 const token* current_token_chain;
c18f07f8
JS
73};
74
75
76class parser
77{
78public:
f8405ea5 79 parser (systemtap_session& s, const string& n, istream& i, unsigned flags=0);
c18f07f8
JS
80 ~parser ();
81
f8405ea5
JS
82 stapfile* parse ();
83 probe* parse_synthetic_probe (const token* chain);
84 stapfile* parse_library_macros ();
c18f07f8
JS
85
86private:
87 typedef enum {
88 PP_NONE,
89 PP_KEEP_THEN,
90 PP_SKIP_THEN,
91 PP_KEEP_ELSE,
92 PP_SKIP_ELSE,
93 } pp_state_t;
94
534aad8b
SM
95 struct pp1_activation;
96
fe410f52
SM
97 struct pp_macrodecl : public macrodecl {
98 pp1_activation* parent_act; // used for param bindings
99 virtual bool is_closure() { return parent_act != 0; }
100 pp_macrodecl () : macrodecl(), parent_act(0) { }
534aad8b
SM
101 };
102
c18f07f8
JS
103 systemtap_session& session;
104 string input_name;
c18f07f8 105 lexer input;
f8405ea5 106 bool errs_as_warnings;
c18f07f8 107 bool privileged;
7b5b30a8 108 bool user_file;
e8b46a9e 109 bool auto_path;
c18f07f8
JS
110 parse_context context;
111
534aad8b
SM
112 // preprocessing subordinate, first pass (macros)
113 struct pp1_activation {
114 const token* tok;
115 unsigned cursor; // position within macro body
116 map<string, pp_macrodecl*> params;
534aad8b 117
fe410f52 118 macrodecl* curr_macro;
534aad8b 119
bdf7707b
JS
120 pp1_activation (const token* tok, macrodecl* curr_macro)
121 : tok(tok), cursor(0), curr_macro(curr_macro) { }
534aad8b
SM
122 ~pp1_activation ();
123 };
124
fe410f52 125 map<string, macrodecl*> pp1_namespace;
534aad8b
SM
126 vector<pp1_activation*> pp1_state;
127 const token* next_pp1 ();
ed891cf3 128 const token* scan_pp1 (bool ignore_macros);
534aad8b
SM
129 const token* slurp_pp1_param (vector<const token*>& param);
130 const token* slurp_pp1_body (vector<const token*>& body);
131
132 // preprocessing subordinate, final pass (conditionals)
c18f07f8 133 vector<pair<const token*, pp_state_t> > pp_state;
b5477cd9 134 const token* scan_pp ();
c18f07f8
JS
135 const token* skip_pp ();
136
137 // scanning state
b5477cd9
SM
138 const token* next ();
139 const token* peek ();
c18f07f8 140
731a5359
MW
141 // Advance past and throw away current token after peek () or next ().
142 void swallow ();
143
a07a2c28 144 const token* systemtap_v_seen;
c18f07f8
JS
145 const token* last_t; // the last value returned by peek() or next()
146 const token* next_t; // lookahead token
147
731a5359
MW
148 // expectations, these swallow the token
149 void expect_known (token_type tt, string const & expected);
47d349b1 150 void expect_unknown (token_type tt, interned_string & target);
47d349b1 151 void expect_unknown2 (token_type tt1, token_type tt2, interned_string & target);
731a5359
MW
152
153 // convenience forms, these also swallow the token
154 void expect_op (string const & expected);
891b96e6 155 interned_string expect_op_any (initializer_list<const char*> expected);
731a5359
MW
156 void expect_kw (string const & expected);
157 void expect_number (int64_t & expected);
b1f2b0e8 158 void expect_ident_or_keyword (interned_string & target);
731a5359
MW
159
160 // convenience forms, which return true or false, these don't swallow token
c18f07f8
JS
161 bool peek_op (string const & op);
162 bool peek_kw (string const & kw);
163
731a5359
MW
164 // convenience forms, which return the token
165 const token* expect_kw_token (string const & expected);
b1f2b0e8 166 const token* expect_ident_or_atword (interned_string & target);
731a5359 167
7ac01ea0 168 void print_error (const parse_error& pe, bool errs_as_warnings = false);
c18f07f8
JS
169 unsigned num_errors;
170
171private: // nonterminals
172 void parse_probe (vector<probe*>&, vector<probe_alias*>&);
f41e297c
JS
173 void parse_private (vector<vardecl*>&, vector<probe*>&,
174 string const&, vector<functiondecl*>&);
175 void parse_global (vector<vardecl*>&, vector<probe*>&,
176 string const&);
177 void do_parse_global (vector<vardecl*>&, vector<probe*>&,
178 string const&, const token*, bool);
179 void parse_functiondecl (vector<functiondecl*>&, string const&);
180 void do_parse_functiondecl (vector<functiondecl*>&, const token*,
181 string const&, bool);
c18f07f8 182 embeddedcode* parse_embeddedcode ();
05f925e9
FL
183 vector<probe_point*> parse_probe_points ();
184 vector<probe_point*> parse_components ();
185 vector<probe_point*> parse_component ();
d24f1ff4
SM
186 literal_string* consume_string_literals (const token*);
187 literal_string* parse_literal_string ();
c18f07f8
JS
188 literal* parse_literal ();
189 block* parse_stmt_block ();
190 try_block* parse_try_block ();
191 statement* parse_statement ();
192 if_statement* parse_if_statement ();
193 for_loop* parse_for_loop ();
194 for_loop* parse_while_loop ();
195 foreach_loop* parse_foreach_loop ();
196 expr_statement* parse_expr_statement ();
197 return_statement* parse_return_statement ();
198 delete_statement* parse_delete_statement ();
199 next_statement* parse_next_statement ();
200 break_statement* parse_break_statement ();
201 continue_statement* parse_continue_statement ();
202 indexable* parse_indexable ();
b1f2b0e8 203 const token *parse_hist_op_or_bare_name (hist_op *&hop, interned_string &name);
0fb0cac9
JS
204 target_symbol *parse_target_symbol ();
205 cast_op *parse_cast_op ();
206 atvar_op *parse_atvar_op ();
8cc799a5 207 expression* parse_entry_op (const token* t);
c18f07f8 208 expression* parse_defined_op (const token* t);
0a7eb12d 209 expression* parse_const_op (const token* t);
3689db05 210 expression* parse_perf_op (const token* t);
4c2e691d
JU
211 expression* parse_target_register (const token* t);
212 expression* parse_target_deref (const token* t);
c18f07f8
JS
213 expression* parse_expression ();
214 expression* parse_assignment ();
215 expression* parse_ternary ();
216 expression* parse_logical_or ();
217 expression* parse_logical_and ();
218 expression* parse_boolean_or ();
219 expression* parse_boolean_xor ();
220 expression* parse_boolean_and ();
221 expression* parse_array_in ();
93daaca8 222 expression* parse_comparison_or_regex_query ();
c18f07f8
JS
223 expression* parse_shift ();
224 expression* parse_concatenation ();
225 expression* parse_additive ();
226 expression* parse_multiplicative ();
227 expression* parse_unary ();
228 expression* parse_crement ();
0fb0cac9 229 expression* parse_dwarf_value ();
c18f07f8
JS
230 expression* parse_value ();
231 expression* parse_symbol ();
232
0fb0cac9 233 bool peek_target_symbol_components ();
c18f07f8
JS
234 void parse_target_symbol_components (target_symbol* e);
235};
236
237
2f1a1aea
FCE
238// ------------------------------------------------------------------------
239
c18f07f8 240stapfile*
ba48c27a 241parse (systemtap_session& s, const string& n, istream& i, unsigned flags)
c18f07f8 242{
ba48c27a 243 parser p (s, n, i, flags);
f8405ea5 244 return p.parse ();
c18f07f8
JS
245}
246
c18f07f8 247stapfile*
f8405ea5 248parse (systemtap_session& s, const string& name, unsigned flags)
c18f07f8 249{
4cd32d8c
JS
250 ifstream i(name.c_str(), ios::in);
251 if (i.fail())
252 {
253 cerr << (file_exists(name)
254 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
255 : _F("Input file '%s' is missing.", name.c_str()))
256 << endl;
257 return 0;
258 }
259
f8405ea5
JS
260 parser p (s, name, i, flags);
261 return p.parse ();
c18f07f8
JS
262}
263
fe410f52 264stapfile*
f8405ea5 265parse_library_macros (systemtap_session& s, const string& name)
fe410f52
SM
266{
267 ifstream i(name.c_str(), ios::in);
268 if (i.fail())
269 {
270 cerr << (file_exists(name)
271 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
272 : _F("Input file '%s' is missing.", name.c_str()))
273 << endl;
274 return 0;
275 }
276
f8405ea5
JS
277 parser p (s, name, i);
278 return p.parse_library_macros ();
fe410f52
SM
279}
280
101b0805 281probe*
aa389a19 282parse_synthetic_probe (systemtap_session &s, istream& i, const token* tok)
101b0805 283{
d026d78c 284 parser p (s, tok ? tok->location.file->name : "<synthetic>", i);
f8405ea5 285 return p.parse_synthetic_probe (tok);
101b0805
JS
286}
287
c18f07f8 288// ------------------------------------------------------------------------
bb2e3076 289
f8405ea5 290parser::parser (systemtap_session& s, const string &n, istream& i, unsigned flags):
8518e54f 291 session (s), input_name (n), input (i, input_name, s, !(flags & pf_no_compatible)),
f8405ea5 292 errs_as_warnings(flags & pf_squash_errors), privileged (flags & pf_guru),
e8b46a9e
FL
293 user_file (flags & pf_user_file), auto_path (flags & pf_auto_path),
294 context(con_unknown), systemtap_v_seen(0), last_t (0), next_t (0), num_errors (0)
4cd32d8c
JS
295{
296}
2f1a1aea
FCE
297
298parser::~parser()
299{
2f1a1aea
FCE
300}
301
d7f3e0c5
GH
302static string
303tt2str(token_type tt)
304{
305 switch (tt)
306 {
307 case tok_junk: return "junk";
308 case tok_identifier: return "identifier";
309 case tok_operator: return "operator";
310 case tok_string: return "string";
311 case tok_number: return "number";
312 case tok_embedded: return "embedded-code";
6e213f58 313 case tok_keyword: return "keyword";
d7f3e0c5
GH
314 }
315 return "unknown token";
316}
82919855 317
0323ed4d
WC
318ostream&
319operator << (ostream& o, const source_loc& loc)
320{
a704a23b 321 o << loc.file->name << ":"
0323ed4d
WC
322 << loc.line << ":"
323 << loc.column;
324
325 return o;
326}
327
56099f08
FCE
328ostream&
329operator << (ostream& o, const token& t)
330{
d7f3e0c5 331 o << tt2str(t.type);
56099f08 332
6e213f58 333 if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
56099f08 334 {
24cb178f
FCE
335 o << " '";
336 for (unsigned i=0; i<t.content.length(); i++)
337 {
338 char c = t.content[i];
339 o << (isprint (c) ? c : '?');
340 }
341 o << "'";
56099f08 342 }
56099f08 343
dff50e09 344 o << " at "
0323ed4d 345 << t.location;
56099f08
FCE
346
347 return o;
348}
349
350
dff50e09 351void
7ac01ea0 352parser::print_error (const parse_error &pe, bool errs_as_warnings)
2f1a1aea 353{
16fc963f 354 const token *tok = pe.tok ? pe.tok : last_t;
7ac01ea0 355 session.print_error(pe, tok, input_name, errs_as_warnings);
2f1a1aea
FCE
356 num_errors ++;
357}
358
359
2f1a1aea 360
c434ec7e
FCE
361
362template <typename OPERAND>
363bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
364{
365 if (op->type == tok_operator && op->content == "<=")
366 { return lhs <= rhs; }
367 else if (op->type == tok_operator && op->content == ">=")
368 { return lhs >= rhs; }
369 else if (op->type == tok_operator && op->content == "<")
370 { return lhs < rhs; }
371 else if (op->type == tok_operator && op->content == ">")
372 { return lhs > rhs; }
373 else if (op->type == tok_operator && op->content == "==")
374 { return lhs == rhs; }
375 else if (op->type == tok_operator && op->content == "!=")
376 { return lhs != rhs; }
377 else
f0454224 378 throw PARSE_ERROR (_("expected comparison operator"), op);
c434ec7e
FCE
379}
380
381
534aad8b
SM
382// Here, we perform on-the-fly preprocessing in two passes.
383
384// First pass - macro declaration and expansion.
385//
386// The basic form of a declaration is @define SIGNATURE %( BODY %)
387// where SIGNATURE is of the form macro_name (a, b, c, ...)
388// and BODY can obtain the parameter contents as @a, @b, @c, ....
389// Note that parameterless macros can also be declared.
390//
3932c705 391// Macro definitions may not be nested.
534aad8b
SM
392// A macro is available textually after it has been defined.
393//
394// The basic form of a macro invocation
395// for a parameterless macro is @macro_name,
396// for a macro with parameters is @macro_name(param_1, param_2, ...).
397//
26718dbe
SM
398// NB: this means that a parameterless macro @foo called as @foo(a, b, c)
399// leaves its 'parameters' alone, rather than consuming them to result
400// in a "too many parameters error". This may be useful in the unusual
401// case of wanting @foo to expand to the name of a function.
534aad8b
SM
402//
403// Invocations of unknown macros are left unexpanded, to allow
404// the continued use of constructs such as @cast, @var, etc.
405
fe410f52 406macrodecl::~macrodecl ()
534aad8b
SM
407{
408 delete tok;
409 for (vector<const token*>::iterator it = body.begin();
410 it != body.end(); it++)
411 delete *it;
412}
413
414parser::pp1_activation::~pp1_activation ()
415{
416 delete tok;
fe410f52 417 if (curr_macro->is_closure()) return; // body is shared with an earlier declaration
534aad8b
SM
418 for (map<string, pp_macrodecl*>::iterator it = params.begin();
419 it != params.end(); it++)
420 delete it->second;
421}
422
423// Grab a token from the current input source (main file or macro body):
424const token*
425parser::next_pp1 ()
426{
427 if (pp1_state.empty())
428 return input.scan ();
429
430 // otherwise, we're inside a macro
431 pp1_activation* act = pp1_state.back();
432 unsigned& cursor = act->cursor;
433 if (cursor < act->curr_macro->body.size())
434 {
435 token* t = new token(*act->curr_macro->body[cursor]);
0002fc51 436 t->chain = new token(*act->tok); // mark chained token
534aad8b
SM
437 cursor++;
438 return t;
439 }
440 else
441 return 0; // reached end of macro body
442}
443
444const token*
ed891cf3 445parser::scan_pp1 (bool ignore_macros = false)
534aad8b
SM
446{
447 while (true)
448 {
449 const token* t = next_pp1 ();
450 if (t == 0) // EOF or end of macro body
451 {
452 if (pp1_state.empty()) // actual EOF
453 return 0;
454
455 // Exit macro and loop around to look for the next token.
456 pp1_activation* act = pp1_state.back();
457 pp1_state.pop_back(); delete act;
458 continue;
459 }
460
461 // macro definition
ed891cf3
MC
462 // PR18462 don't catalog preprocessor-disabled macros
463 if (t->type == tok_operator && t->content == "@define" && !ignore_macros)
534aad8b
SM
464 {
465 if (!pp1_state.empty())
f0454224 466 throw PARSE_ERROR (_("'@define' forbidden inside macro body"), t);
534aad8b
SM
467 delete t;
468
469 // handle macro definition
470 // (1) consume macro signature
3932c705 471 t = input.scan();
534aad8b 472 if (! (t && t->type == tok_identifier))
f0454224 473 throw PARSE_ERROR (_("expected identifier"), t);
47d349b1 474 string name = t->content;
534aad8b
SM
475
476 // check for redefinition of existing macro
477 if (pp1_namespace.find(name) != pp1_namespace.end())
78ab2de3 478 {
f0454224 479 parse_error er (ERR_SRC, _F("attempt to redefine macro '@%s' in the same file", name.c_str ()), t);
78ab2de3
SM
480
481 // Also point to pp1_namespace[name]->tok, the site of
482 // the original definition:
f0454224 483 er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here",
78ab2de3
SM
484 name.c_str()), pp1_namespace[name]->tok);
485 throw er;
486 }
487
488 // XXX: the above restriction was mostly necessary due to
489 // wanting to leave open the possibility of
490 // statically-scoped semantics in the future.
26718dbe
SM
491
492 // XXX: this cascades into further parse errors as the
493 // parser tries to parse the remaining definition... (e.g.
494 // it can't tell that the macro body isn't a conditional,
495 // that the uses of parameters aren't nonexistent
496 // macros.....)
534aad8b 497 if (name == "define")
f0454224 498 throw PARSE_ERROR (_("attempt to redefine '@define'"), t);
17f56d2a 499 if (input.atwords.count(name))
534aad8b
SM
500 session.print_warning (_F("macro redefines built-in operator '@%s'", name.c_str()), t);
501
fe410f52
SM
502 macrodecl* decl = (pp1_namespace[name] = new macrodecl);
503 decl->tok = t;
534aad8b
SM
504
505 // determine if the macro takes parameters
4ac28d7e 506 bool saw_params = false;
3932c705
SM
507 t = input.scan();
508 if (t && t->type == tok_operator && t->content == "(")
4ac28d7e
SM
509 {
510 saw_params = true;
511 do
512 {
513 delete t;
514
515 t = input.scan ();
516 if (! (t && t->type == tok_identifier))
f0454224 517 throw PARSE_ERROR(_("expected identifier"), t);
47d349b1 518 decl->formal_args.push_back(t->content);
4ac28d7e
SM
519 delete t;
520
521 t = input.scan ();
522 if (t && t->type == tok_operator && t->content == ",")
523 {
524 continue;
525 }
526 else if (t && t->type == tok_operator && t->content == ")")
527 {
528 delete t;
529 t = input.scan();
530 break;
531 }
532 else
533 {
f0454224 534 throw PARSE_ERROR (_("expected ',' or ')'"), t);
4ac28d7e
SM
535 }
536 }
537 while (true);
538 }
534aad8b
SM
539
540 // (2) identify & consume macro body
3932c705 541 if (! (t && t->type == tok_operator && t->content == "%("))
4ac28d7e
SM
542 {
543 if (saw_params)
f0454224 544 throw PARSE_ERROR (_("expected '%('"), t);
4ac28d7e 545 else
f0454224 546 throw PARSE_ERROR (_("expected '%(' or '('"), t);
4ac28d7e 547 }
3932c705 548 delete t;
534aad8b 549
3932c705
SM
550 t = slurp_pp1_body (decl->body);
551 if (!t)
f0454224 552 throw PARSE_ERROR (_("incomplete macro definition - missing '%)'"), decl->tok);
3932c705 553 delete t;
534aad8b
SM
554
555 // Now loop around to look for a real token.
556 continue;
557 }
558
559 // (potential) macro invocation
560 if (t->type == tok_operator && t->content[0] == '@')
561 {
7371cd19 562 const string& name = t->content.substr(1); // strip initial '@'
534aad8b
SM
563
564 // check if name refers to a real parameter or macro
fe410f52 565 macrodecl* decl;
534aad8b
SM
566 pp1_activation* act = pp1_state.empty() ? 0 : pp1_state.back();
567 if (act && act->params.find(name) != act->params.end())
568 decl = act->params[name];
fe410f52
SM
569 else if (!(act && act->curr_macro->context == ctx_library)
570 && pp1_namespace.find(name) != pp1_namespace.end())
534aad8b 571 decl = pp1_namespace[name];
fe410f52
SM
572 else if (session.library_macros.find(name)
573 != session.library_macros.end())
574 decl = session.library_macros[name];
534aad8b
SM
575 else // this is an ordinary @operator
576 return t;
577
bdf7707b
JS
578 // handle macro invocation, taking ownership of t
579 pp1_activation *new_act = new pp1_activation(t, decl);
fe410f52 580 unsigned num_params = decl->formal_args.size();
534aad8b
SM
581
582 // (1a) restore parameter invocation closure
fe410f52 583 if (num_params == 0 && decl->is_closure())
534aad8b
SM
584 {
585 // NB: decl->parent_act is always safe since the
586 // parameter decl (if any) comes from an activation
587 // record which deeper in the stack than new_act.
588
589 // decl is a macro parameter which must be evaluated in
590 // the context of the original point of invocation:
fe410f52 591 new_act->params = ((pp_macrodecl*)decl)->parent_act->params;
534aad8b
SM
592 goto expand;
593 }
594
595 // (1b) consume macro parameters (if any)
596 if (num_params == 0)
597 goto expand;
598
599 // for simplicity, we do not allow macro constructs here
600 // -- if we did, we'd have to recursively call scan_pp1()
601 t = next_pp1 ();
39566df2 602 if (! (t && t->type == tok_operator && t->content == "("))
534aad8b
SM
603 {
604 delete new_act;
f0454224 605 throw PARSE_ERROR (_NF
534aad8b 606 ("expected '(' in invocation of macro '@%s'"
f499dee5 607 " taking %d parameter",
534aad8b 608 "expected '(' in invocation of macro '@%s'"
f499dee5 609 " taking %d parameters",
52c2652f 610 num_params, name.c_str(), num_params), t);
534aad8b
SM
611 }
612
613 // XXX perhaps parse/count the full number of params,
614 // so we can say "expected x, found y params" on error?
615 for (unsigned i = 0; i < num_params; i++)
616 {
617 delete t;
618
619 // create parameter closure
fe410f52 620 string param_name = decl->formal_args[i];
534aad8b 621 pp_macrodecl* p = (new_act->params[param_name]
fe410f52
SM
622 = new pp_macrodecl);
623 p->tok = new token(*new_act->tok);
624 p->parent_act = act;
534aad8b
SM
625 // NB: *new_act->tok points to invocation, act is NULL at top level
626
627 t = slurp_pp1_param (p->body);
628
629 // check correct usage of ',' or ')'
630 if (t == 0) // hit unexpected EOF or end of macro
631 {
632 // XXX could we pop the stack and continue parsing
633 // the invocation, allowing macros to construct new
634 // invocations in piecemeal fashion??
635 const token* orig_t = new token(*new_act->tok);
636 delete new_act;
f0454224 637 throw PARSE_ERROR (_("could not find end of macro invocation"), orig_t);
534aad8b
SM
638 }
639 if (t->type == tok_operator && t->content == ",")
640 {
641 if (i + 1 == num_params)
642 {
643 delete new_act;
f0454224 644 throw PARSE_ERROR (_F("too many parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
534aad8b
SM
645 }
646 }
647 else if (t->type == tok_operator && t->content == ")")
648 {
649 if (i + 1 != num_params)
650 {
651 delete new_act;
f0454224 652 throw PARSE_ERROR (_F("too few parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
534aad8b
SM
653 }
654 }
655 else
656 {
657 // XXX this is, incidentally, impossible
658 delete new_act;
f0454224 659 throw PARSE_ERROR(_("expected ',' or ')' after macro parameter"), t);
534aad8b
SM
660 }
661 }
662
663 delete t;
664
665 // (2) set up macro expansion
666 expand:
667 pp1_state.push_back (new_act);
668
669 // Now loop around to look for a real token.
670 continue;
671 }
672
673 // Otherwise, we have an ordinary token.
674 return t;
675 }
676}
677
36fa6eb3 678// Consume a single macro invocation's parameters, heeding nesting
534aad8b
SM
679// brackets and stopping on an unbalanced ')' or an unbracketed ','
680// (and returning the final separator token).
681const token*
682parser::slurp_pp1_param (vector<const token*>& param)
683{
684 const token* t = 0;
685 unsigned nesting = 0;
686 do
687 {
688 t = next_pp1 ();
689
690 if (!t)
691 break;
36fa6eb3
FCE
692 // [ needed in case macro paramater is used as prefix for array-deref operation
693 if (t->type == tok_operator && (t->content == "(" || t->content == "["))
534aad8b 694 ++nesting;
36fa6eb3 695 else if (nesting && t->type == tok_operator && (t->content == ")" || t->content == "]"))
534aad8b
SM
696 --nesting;
697 else if (!nesting && t->type == tok_operator
698 && (t->content == ")" || t->content == ","))
699 break;
700 param.push_back(t);
701 }
702 while (true);
703 return t; // report ")" or "," or NULL
704}
705
706
707// Consume a macro declaration's body, heeding nested %( %) brackets.
708const token*
709parser::slurp_pp1_body (vector<const token*>& body)
710{
711 const token* t = 0;
712 unsigned nesting = 0;
713 do
714 {
715 t = next_pp1 ();
716
717 if (!t)
718 break;
719 if (t->type == tok_operator && t->content == "%(")
720 ++nesting;
721 else if (nesting && t->type == tok_operator && t->content == "%)")
722 --nesting;
723 else if (!nesting && t->type == tok_operator && t->content == "%)")
724 break;
725 body.push_back(t);
726 }
727 while (true);
728 return t; // report final "%)" or NULL
729}
730
fe410f52
SM
731// Used for parsing .stpm files.
732stapfile*
f8405ea5 733parser::parse_library_macros ()
fe410f52
SM
734{
735 stapfile* f = new stapfile;
15f4ba98 736 f->privileged = this->privileged;
fe410f52
SM
737 input.set_current_file (f);
738
739 try
740 {
ed891cf3 741 const token* t = scan_pp ();
fe410f52
SM
742
743 // Currently we only take objection to macro invocations if they
744 // produce a non-whitespace token after being expanded.
745
746 // XXX should we prevent macro invocations even if they expand to empty??
747
748 if (t != 0)
ed891cf3 749 throw PARSE_ERROR (_F("unexpected token in library macro file '%s'", input_name.c_str()), t);
fe410f52
SM
750
751 // We need to first check whether *any* of the macros are duplicates,
752 // then commit to including the entire file in the global namespace
753 // (or not). Yuck.
754 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
755 it != pp1_namespace.end(); it++)
756 {
757 string name = it->first;
758
759 if (session.library_macros.find(name) != session.library_macros.end())
760 {
f0454224
JL
761 parse_error er(ERR_SRC, _F("duplicate definition of library macro '@%s'", name.c_str()), it->second->tok);
762 er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here", name.c_str()), session.library_macros[name]->tok);
78ab2de3 763 print_error (er);
fe410f52 764
78ab2de3 765 delete er.chain;
fe410f52
SM
766 delete f;
767 return 0;
768 }
769 }
770
771 }
772 catch (const parse_error& pe)
773 {
7ac01ea0 774 print_error (pe, errs_as_warnings);
fe410f52
SM
775 delete f;
776 return 0;
777 }
778
779 // If no errors, include the entire file. Note how this is outside
780 // of the try-catch block -- no errors possible.
781 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
782 it != pp1_namespace.end(); it++)
783 {
784 string name = it->first;
785
786 session.library_macros[name] = it->second;
787 session.library_macros[name]->context = ctx_library;
fe410f52
SM
788 }
789
790 return f;
791}
792
534aad8b
SM
793// Second pass - preprocessor conditional expansion.
794//
177a8ead 795// The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
44ce8ed5
FCE
796// where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
797// or: arch COMPARISON-OP "arch-string"
db135493 798// or: systemtap_v COMPARISON-OP "version-string"
2e6dd9d0 799// or: systemtap_privilege COMPARISON-OP "privilege-string"
561079c8 800// or: CONFIG_foo COMPARISON-OP "config-string"
717a457b 801// or: CONFIG_foo COMPARISON-OP number
4227f98d 802// or: CONFIG_foo COMPARISON-OP CONFIG_bar
5811366a
FCE
803// or: "string1" COMPARISON-OP "string2"
804// or: number1 COMPARISON-OP number2
44ce8ed5 805// The %: ELSE-TOKENS part is optional.
177a8ead
FCE
806//
807// e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
c434ec7e 808// e.g. %( arch != "i?86" %? "foo" %: "baz" %)
561079c8 809// e.g. %( CONFIG_foo %? "foo" %: "baz" %)
177a8ead
FCE
810//
811// Up to an entire %( ... %) expression is processed by a single call
812// to this function. Tokens included by any nested conditions are
813// enqueued in a private vector.
814
815bool eval_pp_conditional (systemtap_session& s,
816 const token* l, const token* op, const token* r)
817{
44ce8ed5 818 if (l->type == tok_identifier && (l->content == "kernel_v" ||
db135493
FCE
819 l->content == "kernel_vr" ||
820 l->content == "systemtap_v"))
44ce8ed5 821 {
db135493 822 if (! (r->type == tok_string))
f0454224 823 throw PARSE_ERROR (_("expected string literal"), r);
db135493 824
44ce8ed5 825 string target_kernel_vr = s.kernel_release;
197a4d62 826 string target_kernel_v = s.kernel_base_release;
db135493 827 string target;
dff50e09 828
db135493
FCE
829 if (l->content == "kernel_v") target = target_kernel_v;
830 else if (l->content == "kernel_vr") target = target_kernel_vr;
831 else if (l->content == "systemtap_v") target = s.compatible;
832 else assert (0);
7a468d68 833
47d349b1 834 string query = r->content;
7a468d68
FCE
835 bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
836
44ce8ed5
FCE
837 // collect acceptable strverscmp results.
838 int rvc_ok1, rvc_ok2;
7a468d68 839 bool wc_ok = false;
44ce8ed5
FCE
840 if (op->type == tok_operator && op->content == "<=")
841 { rvc_ok1 = -1; rvc_ok2 = 0; }
842 else if (op->type == tok_operator && op->content == ">=")
843 { rvc_ok1 = 1; rvc_ok2 = 0; }
844 else if (op->type == tok_operator && op->content == "<")
845 { rvc_ok1 = -1; rvc_ok2 = -1; }
846 else if (op->type == tok_operator && op->content == ">")
847 { rvc_ok1 = 1; rvc_ok2 = 1; }
848 else if (op->type == tok_operator && op->content == "==")
7a468d68 849 { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
44ce8ed5 850 else if (op->type == tok_operator && op->content == "!=")
7a468d68 851 { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
44ce8ed5 852 else
f0454224 853 throw PARSE_ERROR (_("expected comparison operator"), op);
7a468d68
FCE
854
855 if ((!wc_ok) && rhs_wildcard)
f0454224 856 throw PARSE_ERROR (_("wildcard not allowed with order comparison operators"), op);
7a468d68
FCE
857
858 if (rhs_wildcard)
859 {
860 int rvc_result = fnmatch (query.c_str(), target.c_str(),
861 FNM_NOESCAPE); // spooky
862 bool badness = (rvc_result == 0) ^ (op->content == "==");
863 return !badness;
864 }
865 else
866 {
867 int rvc_result = strverscmp (target.c_str(), query.c_str());
868 // normalize rvc_result
869 if (rvc_result < 0) rvc_result = -1;
870 if (rvc_result > 0) rvc_result = 1;
871 return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
872 }
44ce8ed5 873 }
2e6dd9d0
SM
874 else if (l->type == tok_identifier && l->content == "systemtap_privilege")
875 {
876 string target_privilege =
cba5b802
SM
877 pr_contains(s.privilege, pr_stapdev) ? "stapdev"
878 : pr_contains(s.privilege, pr_stapsys) ? "stapsys"
879 : pr_contains(s.privilege, pr_stapusr) ? "stapusr"
2e6dd9d0
SM
880 : "none"; /* should be impossible -- s.privilege always one of above */
881 assert(target_privilege != "none");
882
883 if (! (r->type == tok_string))
f0454224 884 throw PARSE_ERROR (_("expected string literal"), r);
47d349b1 885 string query_privilege = r->content;
2e6dd9d0
SM
886
887 bool nomatch = (target_privilege != query_privilege);
888
889 bool result;
890 if (op->type == tok_operator && op->content == "==")
891 result = !nomatch;
892 else if (op->type == tok_operator && op->content == "!=")
893 result = nomatch;
894 else
f0454224 895 throw PARSE_ERROR (_("expected '==' or '!='"), op);
cba5b802 896 /* XXX perhaps allow <= >= and similar comparisons */
2e6dd9d0
SM
897
898 return result;
899 }
92585d32
PK
900 else if (l->type == tok_identifier && l->content == "guru_mode")
901 {
902 if (! (r->type == tok_number))
903 throw PARSE_ERROR (_("expected number"), r);
904 int64_t lhs = (int64_t) s.guru_mode;
47d349b1 905 int64_t rhs = lex_cast<int64_t>(r->content);
92585d32
PK
906 if (!((rhs == 0)||(rhs == 1)))
907 throw PARSE_ERROR (_("expected 0 or 1"), op);
908 if (!((op->type == tok_operator && op->content == "==") ||
909 (op->type == tok_operator && op->content == "!=")))
910 throw PARSE_ERROR (_("expected '==' or '!='"), op);
911
912 return eval_comparison (lhs, op, rhs);
913 }
44ce8ed5
FCE
914 else if (l->type == tok_identifier && l->content == "arch")
915 {
916 string target_architecture = s.architecture;
917 if (! (r->type == tok_string))
f0454224 918 throw PARSE_ERROR (_("expected string literal"), r);
47d349b1 919 string query_architecture = r->content;
dff50e09 920
7a468d68
FCE
921 int nomatch = fnmatch (query_architecture.c_str(),
922 target_architecture.c_str(),
923 FNM_NOESCAPE); // still spooky
924
561079c8
FCE
925 bool result;
926 if (op->type == tok_operator && op->content == "==")
927 result = !nomatch;
928 else if (op->type == tok_operator && op->content == "!=")
929 result = nomatch;
930 else
f0454224 931 throw PARSE_ERROR (_("expected '==' or '!='"), op);
561079c8 932
d9677d7b
DS
933 return result;
934 }
935 else if (l->type == tok_identifier && l->content == "runtime")
936 {
937 if (! (r->type == tok_string))
f0454224 938 throw PARSE_ERROR (_("expected string literal"), r);
d9677d7b 939
47d349b1 940 string query_runtime = r->content;
d9677d7b
DS
941 string target_runtime;
942
cf8d5f1d
AM
943 if (s.runtime_mode == systemtap_session::dyninst_runtime)
944 target_runtime = "dyninst";
945 else if (s.runtime_mode == systemtap_session::bpf_runtime)
946 target_runtime = "bpf";
947 else
948 target_runtime = "kernel";
949
d9677d7b
DS
950 int nomatch = fnmatch (query_runtime.c_str(),
951 target_runtime.c_str(),
952 FNM_NOESCAPE); // still spooky
953
954 bool result;
955 if (op->type == tok_operator && op->content == "==")
956 result = !nomatch;
957 else if (op->type == tok_operator && op->content == "!=")
958 result = nomatch;
959 else
f0454224 960 throw PARSE_ERROR (_("expected '==' or '!='"), op);
d9677d7b 961
561079c8
FCE
962 return result;
963 }
f5a34c5a 964 else if (l->type == tok_identifier && l->content.starts_with("CONFIG_"))
561079c8 965 {
717a457b
MW
966 if (r->type == tok_string)
967 {
47d349b1
FCE
968 string lhs = s.kernel_config[l->content]; // may be empty
969 string rhs = r->content;
561079c8 970
717a457b 971 int nomatch = fnmatch (rhs.c_str(), lhs.c_str(), FNM_NOESCAPE); // still spooky
561079c8 972
717a457b
MW
973 bool result;
974 if (op->type == tok_operator && op->content == "==")
975 result = !nomatch;
976 else if (op->type == tok_operator && op->content == "!=")
977 result = nomatch;
978 else
f0454224 979 throw PARSE_ERROR (_("expected '==' or '!='"), op);
dff50e09 980
717a457b
MW
981 return result;
982 }
983 else if (r->type == tok_number)
984 {
7371cd19
JS
985 const string& lhs_string = s.kernel_config[l->content];
986 const char* startp = lhs_string.c_str ();
717a457b
MW
987 char* endp = (char*) startp;
988 errno = 0;
989 int64_t lhs = (int64_t) strtoll (startp, & endp, 0);
990 if (errno == ERANGE || errno == EINVAL || *endp != '\0')
f0454224 991 throw PARSE_ERROR ("Config option value not a number", l);
717a457b 992
47d349b1 993 int64_t rhs = lex_cast<int64_t>(r->content);
717a457b
MW
994 return eval_comparison (lhs, op, rhs);
995 }
4227f98d 996 else if (r->type == tok_identifier
f5a34c5a 997 && r->content.starts_with( "CONFIG_"))
4227f98d
MW
998 {
999 // First try to convert both to numbers,
1000 // otherwise threat both as strings.
7371cd19
JS
1001 const string& lhs_string = s.kernel_config[l->content];
1002 const string& rhs_string = s.kernel_config[r->content];
1003 const char* startp = lhs_string.c_str ();
4227f98d
MW
1004 char* endp = (char*) startp;
1005 errno = 0;
1006 int64_t val = (int64_t) strtoll (startp, & endp, 0);
1007 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
1008 {
1009 int64_t lhs = val;
7371cd19 1010 startp = rhs_string.c_str ();
4227f98d
MW
1011 endp = (char*) startp;
1012 errno = 0;
1013 int64_t rhs = (int64_t) strtoll (startp, & endp, 0);
1014 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
1015 return eval_comparison (lhs, op, rhs);
1016 }
1017
7371cd19 1018 return eval_comparison (lhs_string, op, rhs_string);
4227f98d 1019 }
717a457b 1020 else
f0454224 1021 throw PARSE_ERROR (_("expected string, number literal or other CONFIG_... as right side operand"), r);
dff50e09 1022 }
c434ec7e 1023 else if (l->type == tok_string && r->type == tok_string)
5811366a 1024 {
47d349b1
FCE
1025 string lhs = l->content;
1026 string rhs = r->content;
c434ec7e
FCE
1027 return eval_comparison (lhs, op, rhs);
1028 // NB: no wildcarding option here
1029 }
1030 else if (l->type == tok_number && r->type == tok_number)
1031 {
47d349b1
FCE
1032 int64_t lhs = lex_cast<int64_t>(l->content);
1033 int64_t rhs = lex_cast<int64_t>(r->content);
c434ec7e 1034 return eval_comparison (lhs, op, rhs);
7a468d68 1035 // NB: no wildcarding option here
5811366a
FCE
1036 }
1037 else if (l->type == tok_string && r->type == tok_number
1038 && op->type == tok_operator)
f0454224 1039 throw PARSE_ERROR (_("expected string literal as right value"), r);
5811366a
FCE
1040 else if (l->type == tok_number && r->type == tok_string
1041 && op->type == tok_operator)
f0454224 1042 throw PARSE_ERROR (_("expected number literal as right value"), r);
c434ec7e 1043
177a8ead 1044 else
f0454224 1045 throw PARSE_ERROR (_("expected 'arch', 'kernel_v', 'kernel_vr', 'systemtap_v',\n"
d9677d7b
DS
1046 " 'runtime', 'systemtap_privilege', 'CONFIG_...', or\n"
1047 " comparison between strings or integers"), l);
177a8ead
FCE
1048}
1049
1050
5811366a 1051// Only tokens corresponding to the TRUE statement must be expanded
177a8ead 1052const token*
b5477cd9 1053parser::scan_pp ()
177a8ead
FCE
1054{
1055 while (true)
1056 {
e92f2566
JS
1057 pp_state_t pp = PP_NONE;
1058 if (!pp_state.empty())
1059 pp = pp_state.back().second;
1060
1061 const token* t = 0;
1062 if (pp == PP_SKIP_THEN || pp == PP_SKIP_ELSE)
1063 t = skip_pp ();
1064 else
534aad8b 1065 t = scan_pp1 ();
e92f2566
JS
1066
1067 if (t == 0) // EOF
177a8ead 1068 {
e92f2566
JS
1069 if (pp != PP_NONE)
1070 {
1071 t = pp_state.back().first;
1072 pp_state.pop_back(); // so skip_some doesn't keep trying to close this
ce0f6648 1073 //TRANSLATORS: 'conditional' meaning 'conditional preprocessing'
f0454224 1074 throw PARSE_ERROR (_("incomplete conditional at end of file"), t);
e92f2566 1075 }
177a8ead
FCE
1076 return t;
1077 }
1078
e92f2566
JS
1079 // misplaced preprocessor "then"
1080 if (t->type == tok_operator && t->content == "%?")
f0454224 1081 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
e92f2566
JS
1082
1083 // preprocessor "else"
1084 if (t->type == tok_operator && t->content == "%:")
1085 {
1086 if (pp == PP_NONE)
f0454224 1087 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
e92f2566 1088 if (pp == PP_KEEP_ELSE || pp == PP_SKIP_ELSE)
f0454224 1089 throw PARSE_ERROR (_("invalid conditional - duplicate '%:'"), t);
1d94e4e5
SM
1090 // XXX: here and elsewhere, error cascades might be avoided
1091 // by dropping tokens until we reach the closing %)
e92f2566
JS
1092
1093 pp_state.back().second = (pp == PP_KEEP_THEN) ?
1094 PP_SKIP_ELSE : PP_KEEP_ELSE;
1095 delete t;
1096 continue;
1097 }
1098
1099 // preprocessor close
1100 if (t->type == tok_operator && t->content == "%)")
1101 {
1102 if (pp == PP_NONE)
f0454224 1103 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
e92f2566 1104 delete pp_state.back().first;
a07a2c28 1105 delete t; //this is the closing bracket
e92f2566
JS
1106 pp_state.pop_back();
1107 continue;
1108 }
dff50e09 1109
177a8ead
FCE
1110 if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
1111 return t;
1112
1113 // We have a %( - it's time to throw a preprocessing party!
1114
2d7881bf
PP
1115 bool result = false;
1116 bool and_result = true;
1117 const token *n = NULL;
1118 do {
1119 const token *l, *op, *r;
534aad8b
SM
1120 l = scan_pp1 ();
1121 op = scan_pp1 ();
1122 r = scan_pp1 ();
2d7881bf 1123 if (l == 0 || op == 0 || r == 0)
f0454224 1124 throw PARSE_ERROR (_("incomplete condition after '%('"), t);
2d7881bf
PP
1125 // NB: consider generalizing to consume all tokens until %?, and
1126 // passing that as a vector to an evaluator.
1127
1128 // Do not evaluate the condition if we haven't expanded everything.
1129 // This may occur when having several recursive conditionals.
1130 and_result &= eval_pp_conditional (session, l, op, r);
a07a2c28
LB
1131 if(l->content=="systemtap_v")
1132 systemtap_v_seen=r;
1133
1134 else
1135 delete r;
1136
2d7881bf
PP
1137 delete l;
1138 delete op;
2d7881bf
PP
1139 delete n;
1140
534aad8b 1141 n = scan_pp1 ();
2d7881bf
PP
1142 if (n && n->type == tok_operator && n->content == "&&")
1143 continue;
1144 result |= and_result;
1145 and_result = true;
1146 if (! (n && n->type == tok_operator && n->content == "||"))
1147 break;
1148 } while (true);
3f847830
FCE
1149
1150 /*
1151 clog << "PP eval (" << *t << ") == " << result << endl;
1152 */
1153
e92f2566 1154 const token *m = n;
177a8ead 1155 if (! (m && m->type == tok_operator && m->content == "%?"))
f0454224 1156 throw PARSE_ERROR (_("expected '%?' marker for conditional"), t);
70c743d8 1157 delete m; // "%?"
177a8ead 1158
e92f2566
JS
1159 pp = result ? PP_KEEP_THEN : PP_SKIP_THEN;
1160 pp_state.push_back (make_pair (t, pp));
3f847830 1161
e92f2566
JS
1162 // Now loop around to look for a real token.
1163 }
1164}
3f847830 1165
3f847830 1166
e92f2566
JS
1167// Skip over tokens and any errors, heeding
1168// only nested preprocessor starts and ends.
1169const token*
1170parser::skip_pp ()
1171{
1172 const token* t = 0;
1173 unsigned nesting = 0;
1174 do
1175 {
1176 try
1177 {
ed891cf3 1178 t = scan_pp1 (true);
177a8ead 1179 }
e92f2566 1180 catch (const parse_error &e)
70c743d8 1181 {
e92f2566 1182 continue;
70c743d8 1183 }
e92f2566
JS
1184 if (!t)
1185 break;
1186 if (t->type == tok_operator && t->content == "%(")
1187 ++nesting;
1188 else if (nesting && t->type == tok_operator && t->content == "%)")
1189 --nesting;
1190 else if (!nesting && t->type == tok_operator &&
1191 (t->content == "%:" || t->content == "%?" || t->content == "%)"))
1192 break;
1193 delete t;
177a8ead 1194 }
e92f2566
JS
1195 while (true);
1196 return t;
177a8ead
FCE
1197}
1198
1199
2f1a1aea 1200const token*
b5477cd9 1201parser::next ()
2f1a1aea
FCE
1202{
1203 if (! next_t)
b5477cd9 1204 next_t = scan_pp ();
2f1a1aea 1205 if (! next_t)
f0454224 1206 throw PARSE_ERROR (_("unexpected end-of-file"));
2f1a1aea 1207
2f1a1aea
FCE
1208 last_t = next_t;
1209 // advance by zeroing next_t
1210 next_t = 0;
1211 return last_t;
1212}
1213
1214
1215const token*
b5477cd9 1216parser::peek ()
2f1a1aea
FCE
1217{
1218 if (! next_t)
b5477cd9 1219 next_t = scan_pp ();
2f1a1aea
FCE
1220
1221 // don't advance by zeroing next_t
1222 last_t = next_t;
1223 return next_t;
1224}
1225
1226
731a5359
MW
1227void
1228parser::swallow ()
1229{
1230 // can only swallow something last peeked or nexted token.
1231 assert (last_t != 0);
1232 delete last_t;
1233 // advance by zeroing next_t
1234 last_t = next_t = 0;
1235}
1236
1237
d7f3e0c5
GH
1238static inline bool
1239tok_is(token const * t, token_type tt, string const & expected)
1240{
1241 return t && t->type == tt && t->content == expected;
1242}
1243
1244
731a5359 1245void
d7f3e0c5
GH
1246parser::expect_known (token_type tt, string const & expected)
1247{
1248 const token *t = next();
57b73400 1249 if (! (t && t->type == tt && t->content == expected))
f0454224 1250 throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
731a5359 1251 swallow (); // We are done with it, content was copied.
d7f3e0c5
GH
1252}
1253
1254
a3e980f9 1255void
47d349b1 1256parser::expect_unknown (token_type tt, interned_string & target)
a3e980f9
FCE
1257{
1258 const token *t = next();
1259 if (!(t && t->type == tt))
1260 throw PARSE_ERROR (_("expected ") + tt2str(tt));
1261 target = t->content;
1262 swallow (); // We are done with it, content was copied.
1263}
1264
d7f3e0c5 1265
a3e980f9 1266void
47d349b1 1267parser::expect_unknown2 (token_type tt1, token_type tt2, interned_string & target)
a3e980f9
FCE
1268{
1269 const token *t = next();
1270 if (!(t && (t->type == tt1 || t->type == tt2)))
1271 throw PARSE_ERROR (_F("expected %s or %s", tt2str(tt1).c_str(), tt2str(tt2).c_str()));
1272 target = t->content;
1273 swallow (); // We are done with it, content was copied.
1274}
1275
493ee224 1276
731a5359 1277void
aa389a19 1278parser::expect_op (string const & expected)
d7f3e0c5 1279{
731a5359 1280 expect_known (tok_operator, expected);
d7f3e0c5
GH
1281}
1282
1efdc9a9 1283interned_string
891b96e6 1284parser::expect_op_any (initializer_list<const char*> expected)
1efdc9a9
FL
1285{
1286 const token *t = next();
891b96e6
JS
1287 if (t && t->type == tok_operator)
1288 for (auto it = expected.begin(); it != expected.end(); ++it)
1289 if (t->content == *it)
1efdc9a9 1290 {
891b96e6
JS
1291 interned_string found = t->content;
1292 swallow (); // We are done with it, content was copied.
1293 return found;
1efdc9a9 1294 }
891b96e6
JS
1295
1296 string msg;
1297 for (auto it = expected.begin(); it != expected.end(); ++it)
1298 {
1299 if (it != expected.begin())
1efdc9a9
FL
1300 msg.append(" ");
1301 msg.append(*it);
1302 }
891b96e6 1303 throw PARSE_ERROR (_F("expected one of '%s'", msg.c_str()));
1efdc9a9 1304}
d7f3e0c5 1305
731a5359 1306void
aa389a19 1307parser::expect_kw (string const & expected)
d7f3e0c5 1308{
731a5359 1309 expect_known (tok_keyword, expected);
d7f3e0c5
GH
1310}
1311
dff50e09 1312const token*
aa389a19 1313parser::expect_kw_token (string const & expected)
731a5359
MW
1314{
1315 const token *t = next();
1316 if (! (t && t->type == tok_keyword && t->content == expected))
f0454224 1317 throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
731a5359
MW
1318 return t;
1319}
1320
1321void
e38723d2 1322parser::expect_number (int64_t & value)
57b73400 1323{
e38723d2
MH
1324 bool neg = false;
1325 const token *t = next();
1326 if (t->type == tok_operator && t->content == "-")
1327 {
1328 neg = true;
731a5359 1329 swallow ();
e38723d2
MH
1330 t = next ();
1331 }
1332 if (!(t && t->type == tok_number))
f0454224 1333 throw PARSE_ERROR (_("expected number"));
e38723d2 1334
7371cd19
JS
1335 const string& s = t->content;
1336 const char* startp = s.c_str ();
e38723d2
MH
1337 char* endp = (char*) startp;
1338
1339 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1340 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1341 // since the lexer only gives us positive digit strings, but we'll
1342 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1343 errno = 0;
1344 value = (int64_t) strtoull (startp, & endp, 0);
1345 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
1346 || (neg && (unsigned long long) value > 9223372036854775808ULL)
1347 || (unsigned long long) value > 18446744073709551615ULL
1348 || value < -9223372036854775807LL-1)
f0454224 1349 throw PARSE_ERROR (_("number invalid or out of range"));
dff50e09 1350
e38723d2
MH
1351 if (neg)
1352 value = -value;
1353
731a5359 1354 swallow (); // We are done with it, content was parsed and copied into value.
57b73400
GH
1355}
1356
d7f3e0c5 1357
dff50e09 1358const token*
b1f2b0e8 1359parser::expect_ident_or_atword (interned_string & target)
d7f3e0c5 1360{
06219d6f
SM
1361 const token *t = next();
1362
1363 // accept identifiers and operators beginning in '@':
1364 if (!t || (t->type != tok_identifier
1365 && (t->type != tok_operator || t->content[0] != '@')))
1366 // XXX currently this is only called from parse_hist_op_or_bare_name(),
1367 // so the message is accurate, but keep an eye out in the future:
f0454224 1368 throw PARSE_ERROR (_F("expected %s or statistical operation", tt2str(tok_identifier).c_str()));
06219d6f 1369
47d349b1 1370 target = t->content;
06219d6f 1371 return t;
d7f3e0c5
GH
1372}
1373
1374
731a5359 1375void
b1f2b0e8 1376parser::expect_ident_or_keyword (interned_string & target)
493ee224 1377{
731a5359 1378 expect_unknown2 (tok_identifier, tok_keyword, target);
493ee224
DS
1379}
1380
1381
dff50e09 1382bool
aa389a19 1383parser::peek_op (string const & op)
d7f3e0c5
GH
1384{
1385 return tok_is (peek(), tok_operator, op);
1386}
1387
1388
dff50e09 1389bool
aa389a19 1390parser::peek_kw (string const & kw)
d7f3e0c5
GH
1391{
1392 return tok_is (peek(), tok_identifier, kw);
1393}
1394
1395
1396
8518e54f
FCE
1397lexer::lexer (istream& input, const string& in, systemtap_session& s, bool cc):
1398 ate_comment(false), ate_whitespace(false), saw_tokens(false), check_compatible(cc),
aa389a19
FCE
1399 input_name (in), input_pointer (0), input_end (0), cursor_suspend_count(0),
1400 cursor_suspend_line (1), cursor_suspend_column (1), cursor_line (1),
1401 cursor_column (1), session(s), current_file (0), current_token_chain (0)
eacb10ce 1402{
aa389a19 1403 getline(input, input_contents, '\0');
73ddea86 1404
4915b3e4
AJ
1405 input_pointer = input_contents.data();
1406 input_end = input_contents.data() + input_contents.size();
66c7d4c1
JS
1407
1408 if (keywords.empty())
1409 {
3a7ec735
FCE
1410 // NB: adding new keywords is highly disruptive to the language,
1411 // in particular to existing scripts that could be suddenly
1412 // broken. If done at all, it has to be s.compatible-sensitive,
1413 // and broadly advertised.
66c7d4c1
JS
1414 keywords.insert("probe");
1415 keywords.insert("global");
38bf68a8
MC
1416 if (has_version("3.0"))
1417 keywords.insert("private");
66c7d4c1
JS
1418 keywords.insert("function");
1419 keywords.insert("if");
1420 keywords.insert("else");
1421 keywords.insert("for");
1422 keywords.insert("foreach");
1423 keywords.insert("in");
1424 keywords.insert("limit");
1425 keywords.insert("return");
1426 keywords.insert("delete");
1427 keywords.insert("while");
1428 keywords.insert("break");
1429 keywords.insert("continue");
1430 keywords.insert("next");
1431 keywords.insert("string");
1432 keywords.insert("long");
f4fe2e93
FCE
1433 keywords.insert("try");
1434 keywords.insert("catch");
66c7d4c1 1435 }
2524d1fd
SM
1436
1437 if (atwords.empty())
1438 {
1439 // NB: adding new @words is mildly disruptive to existing
1440 // scripts that define macros with the same name, but not
1441 // really. The user will merely receive a warning that they are
1442 // redefining an existing operator.
17f56d2a
JS
1443
1444 // These are inserted without the actual '@', so we can directly check
1445 // proposed macro names without building a string with that prefix.
1446 atwords.insert("cast");
1447 atwords.insert("defined");
1448 atwords.insert("entry");
1449 atwords.insert("perf");
1450 atwords.insert("var");
1451 atwords.insert("avg");
1452 atwords.insert("count");
1453 atwords.insert("sum");
1454 atwords.insert("min");
1455 atwords.insert("max");
1456 atwords.insert("hist_linear");
1457 atwords.insert("hist_log");
63ead7fa
MC
1458 if (has_version("3.1"))
1459 {
1460 atwords.insert("const");
1461 atwords.insert("variance");
1462 }
4c2e691d
JU
1463 if (has_version("4.0"))
1464 {
1465 atwords.insert("kregister");
1466 atwords.insert("uregister");
1467 atwords.insert("kderef");
1468 atwords.insert("uderef");
1469 }
2524d1fd 1470 }
eacb10ce 1471}
2f1a1aea 1472
85c97fc2 1473unordered_set<string> lexer::atwords;
66c7d4c1 1474
1b1b4ceb
RA
1475void
1476lexer::set_current_file (stapfile* f)
1477{
1478 current_file = f;
2203b032
JS
1479 if (f)
1480 {
47d349b1 1481 f->file_contents = input_contents;
2203b032
JS
1482 f->name = input_name;
1483 }
1b1b4ceb 1484}
bb2e3076 1485
101b0805
JS
1486void
1487lexer::set_current_token_chain (const token* tok)
1488{
1489 current_token_chain = tok;
1490}
1491
bb2e3076
FCE
1492int
1493lexer::input_peek (unsigned n)
1494{
aa389a19
FCE
1495 if (input_pointer + n >= input_end)
1496 return -1; // EOF
1497 return (unsigned char)*(input_pointer + n);
bb2e3076
FCE
1498}
1499
1500
f8405ea5
JS
1501bool
1502lexer::has_version (const char* v) const
1503{
1504 return check_compatible
1505 ? strverscmp(session.compatible.c_str(), v) >= 0
1506 : true;
1507}
1508
dff50e09 1509int
2f1a1aea
FCE
1510lexer::input_get ()
1511{
66c7d4c1 1512 int c = input_peek();
bb2e3076
FCE
1513 if (c < 0) return c; // EOF
1514
aa389a19 1515 ++input_pointer;
66c7d4c1 1516
aa389a19
FCE
1517 if (cursor_suspend_count)
1518 {
1519 // Track effect of input_put: preserve previous cursor/line_column
1520 // until all of its characters are consumed.
1521 if (--cursor_suspend_count == 0)
1522 {
1523 cursor_line = cursor_suspend_line;
1524 cursor_column = cursor_suspend_column;
1525 }
1526 }
3f99432c 1527 else
2f1a1aea 1528 {
3f99432c
FCE
1529 // update source cursor
1530 if (c == '\n')
1531 {
1532 cursor_line ++;
1533 cursor_column = 1;
1534 }
1535 else
1536 cursor_column ++;
2f1a1aea 1537 }
2f1a1aea 1538
eacb10ce 1539 // clog << "[" << (char)c << "]";
2f1a1aea
FCE
1540 return c;
1541}
1542
1543
3f99432c 1544void
aa389a19 1545lexer::input_put (const string& chars, const token* t)
3f99432c 1546{
aa389a19
FCE
1547 size_t pos = input_pointer - input_contents.data();
1548 // clog << "[put:" << chars << " @" << pos << "]";
1549 input_contents.insert (pos, chars);
1550 cursor_suspend_count += chars.size();
1551 cursor_suspend_line = cursor_line;
1552 cursor_suspend_column = cursor_column;
1553 cursor_line = t->location.line;
1554 cursor_column = t->location.column;
1555 input_pointer = input_contents.data() + pos;
1556 input_end = input_contents.data() + input_contents.size();
46b5cfb2
AJ
1557}
1558
3f99432c 1559
2f1a1aea 1560token*
b5477cd9 1561lexer::scan ()
2f1a1aea 1562{
fee28e5c 1563 ate_comment = false; // reset for each new token
b5477cd9 1564 ate_whitespace = false; // reset for each new token
534aad8b
SM
1565
1566 // XXX be very sure to restore old_saw_tokens if we return without a token:
1567 bool old_saw_tokens = saw_tokens;
1568 saw_tokens = true;
1569
2f1a1aea 1570 token* n = new token;
aa389a19 1571 string token_str; // accumulate here instead of by incremental interning
2203b032 1572 n->location.file = current_file;
101b0805 1573 n->chain = current_token_chain;
2f1a1aea 1574
9300f661 1575skip:
aa389a19 1576 bool suspended = (cursor_suspend_count > 0);
2f1a1aea
FCE
1577 n->location.line = cursor_line;
1578 n->location.column = cursor_column;
1579
1580 int c = input_get();
3f99432c 1581 // clog << "{" << (char)c << (char)c2 << "}";
2f1a1aea
FCE
1582 if (c < 0)
1583 {
1584 delete n;
534aad8b 1585 saw_tokens = old_saw_tokens;
2f1a1aea
FCE
1586 return 0;
1587 }
1588
1589 if (isspace (c))
b5477cd9
SM
1590 {
1591 ate_whitespace = true;
1592 goto skip;
1593 }
2f1a1aea 1594
66c7d4c1
JS
1595 int c2 = input_peek ();
1596
3f99432c
FCE
1597 // Paste command line arguments as character streams into
1598 // the beginning of a token. $1..$999 go through as raw
1599 // characters; @1..@999 are quoted/escaped as strings.
1600 // $# and @# expand to the number of arguments, similarly
1601 // raw or quoted.
9300f661 1602 if ((c == '$' || c == '@') && (c2 == '#'))
3f99432c 1603 {
aa389a19
FCE
1604 token_str.push_back (c);
1605 token_str.push_back (c2);
3f99432c 1606 input_get(); // swallow '#'
46b5cfb2 1607
9300f661 1608 if (suspended)
16fc963f 1609 {
10e7c19d 1610 n->make_junk(tok_junk_nested_arg);
16fc963f
SM
1611 return n;
1612 }
aa389a19
FCE
1613 size_t num_args = session.args.size ();
1614 input_put ((c == '$') ? lex_cast (num_args) : lex_cast_qstring (num_args), n);
1615 token_str.clear();
9300f661 1616 goto skip;
3f99432c 1617 }
9300f661 1618 else if ((c == '$' || c == '@') && (isdigit (c2)))
3f99432c
FCE
1619 {
1620 unsigned idx = 0;
aa389a19 1621 token_str.push_back (c);
3f99432c
FCE
1622 do
1623 {
1624 input_get ();
aa389a19 1625 token_str.push_back (c2);
3f99432c
FCE
1626 idx = (idx * 10) + (c2 - '0');
1627 c2 = input_peek ();
1628 } while (c2 > 0 &&
dff50e09 1629 isdigit (c2) &&
3f99432c 1630 idx <= session.args.size()); // prevent overflow
16fc963f
SM
1631 if (suspended)
1632 {
10e7c19d 1633 n->make_junk(tok_junk_nested_arg);
16fc963f
SM
1634 return n;
1635 }
3f99432c
FCE
1636 if (idx == 0 ||
1637 idx-1 >= session.args.size())
16fc963f 1638 {
10e7c19d 1639 n->make_junk(tok_junk_invalid_arg);
16fc963f
SM
1640 return n;
1641 }
8518e54f 1642 session.used_args[idx-1] = true;
aa389a19
FCE
1643 const string& arg = session.args[idx-1];
1644 input_put ((c == '$') ? arg : lex_cast_qstring (arg), n);
1645 token_str.clear();
9300f661 1646 goto skip;
3f99432c
FCE
1647 }
1648
b5477cd9 1649 else if (isalpha (c) || c == '$' || c == '@' || c == '_')
2f1a1aea 1650 {
aa389a19 1651 token_str = (char) c;
b5477cd9 1652 while (isalnum (c2) || c2 == '_' || c2 == '$')
2f1a1aea 1653 {
3f99432c 1654 input_get ();
aa389a19 1655 token_str.push_back (c2);
3f99432c 1656 c2 = input_peek ();
6e213f58 1657 }
5775f11f 1658 n->content = token_str;
213bee8f 1659
5775f11f 1660 if (n->content[0] == '@')
dd90d565 1661 // makes it easier to detect illegal use of @words:
06219d6f 1662 n->type = tok_operator;
5775f11f
JS
1663 else if (keywords.count(n->content))
1664 n->type = tok_keyword;
1665 else
1666 n->type = tok_identifier;
dff50e09 1667
2f1a1aea
FCE
1668 return n;
1669 }
1670
3a20432b 1671 else if (isdigit (c)) // positive literal
2f1a1aea 1672 {
2f1a1aea 1673 n->type = tok_number;
aa389a19 1674 token_str = (char) c;
9c0c0e46 1675
66c7d4c1 1676 while (isalnum (c2))
2f1a1aea 1677 {
9c0c0e46
FCE
1678 // NB: isalnum is very permissive. We rely on strtol, called in
1679 // parser::parse_literal below, to confirm that the number string
1680 // is correctly formatted and in range.
1681
66c7d4c1 1682 input_get ();
aa389a19 1683 token_str.push_back (c2);
66c7d4c1 1684 c2 = input_peek ();
2f1a1aea 1685 }
aa389a19 1686
47d349b1 1687 n->content = token_str;
2f1a1aea
FCE
1688 return n;
1689 }
1690
1691 else if (c == '\"')
1692 {
1693 n->type = tok_string;
1694 while (1)
1695 {
1696 c = input_get ();
1697
3f99432c 1698 if (c < 0 || c == '\n')
2f1a1aea 1699 {
10e7c19d 1700 n->make_junk(tok_junk_unclosed_quote);
16fc963f 1701 return n;
2f1a1aea
FCE
1702 }
1703 if (c == '\"') // closing double-quotes
1704 break;
3f99432c 1705 else if (c == '\\') // see also input_put
dff50e09 1706 {
aa389a19
FCE
1707 c = input_get();
1708 switch (c)
7d46afb8 1709 {
ef8a6134 1710 case 'x':
f8405ea5 1711 if (!has_version("2.3"))
ef8a6134 1712 goto the_default;
c92d3b42 1713 /* FALLTHROUGH */
7d46afb8
GH
1714 case 'a':
1715 case 'b':
1716 case 't':
1717 case 'n':
1718 case 'v':
1719 case 'f':
1720 case 'r':
f03954fd 1721 case '0' ... '7': // NB: need only match the first digit
7d46afb8 1722 case '\\':
7d46afb8 1723 // Pass these escapes through to the string value
dff50e09 1724 // being parsed; it will be emitted into a C literal.
c7c8d469
FCE
1725 // XXX: PR13371: perhaps we should evaluate them here
1726 // (and re-quote them during translate.cxx emission).
aa389a19 1727 token_str.push_back ('\\');
7d46afb8 1728
3f99432c 1729 // fall through
ef8a6134 1730 default: the_default:
aa389a19
FCE
1731 token_str.push_back (c);
1732 break;
7d46afb8 1733 }
2f1a1aea
FCE
1734 }
1735 else
aa389a19 1736 token_str.push_back (c);
2f1a1aea 1737 }
47d349b1 1738 n->content = token_str;
2f1a1aea
FCE
1739 return n;
1740 }
1741
1742 else if (ispunct (c))
1743 {
bb2e3076 1744 int c3 = input_peek (1);
2f1a1aea 1745
3a20432b
FCE
1746 // NB: if we were to recognize negative numeric literals here,
1747 // we'd introduce another grammar ambiguity:
1748 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
1749 // instead of tok_number(1) tok_operator('-') tok_number(1)
1750
66c7d4c1 1751 if (c == '#') // shell comment
2f1a1aea
FCE
1752 {
1753 unsigned this_line = cursor_line;
bb2e3076
FCE
1754 do { c = input_get (); }
1755 while (c >= 0 && cursor_line == this_line);
fee28e5c 1756 ate_comment = true;
b5477cd9 1757 ate_whitespace = true;
2f1a1aea
FCE
1758 goto skip;
1759 }
66c7d4c1 1760 else if ((c == '/' && c2 == '/')) // C++ comment
63a7c90e
FCE
1761 {
1762 unsigned this_line = cursor_line;
bb2e3076
FCE
1763 do { c = input_get (); }
1764 while (c >= 0 && cursor_line == this_line);
fee28e5c 1765 ate_comment = true;
b5477cd9 1766 ate_whitespace = true;
63a7c90e
FCE
1767 goto skip;
1768 }
1769 else if (c == '/' && c2 == '*') // C comment
1770 {
66c7d4c1
JS
1771 (void) input_get (); // swallow '*' already in c2
1772 c = input_get ();
63a7c90e 1773 c2 = input_get ();
bb2e3076 1774 while (c2 >= 0)
63a7c90e 1775 {
66c7d4c1
JS
1776 if (c == '*' && c2 == '/')
1777 break;
63a7c90e
FCE
1778 c = c2;
1779 c2 = input_get ();
63a7c90e 1780 }
fee28e5c 1781 ate_comment = true;
b5477cd9 1782 ate_whitespace = true;
bb2e3076 1783 goto skip;
63a7c90e 1784 }
54dfabe9
FCE
1785 else if (c == '%' && c2 == '{') // embedded code
1786 {
1787 n->type = tok_embedded;
1788 (void) input_get (); // swallow '{' already in c2
66c7d4c1 1789 c = input_get ();
aa389a19 1790 c2 = input_get ();
66c7d4c1 1791 while (c2 >= 0)
54dfabe9 1792 {
66c7d4c1 1793 if (c == '%' && c2 == '}')
46b5cfb2 1794 {
47d349b1 1795 n->content = token_str;
46b5cfb2
AJ
1796 return n;
1797 }
ebbf9df4
FCE
1798 if (c == '}' && c2 == '%') // possible typo
1799 session.print_warning (_("possible erroneous closing '}%', use '%}'?"), n);
aa389a19
FCE
1800 token_str.push_back (c);
1801 c = c2;
1802 c2 = input_get();
54dfabe9 1803 }
72cdb9cd 1804
10e7c19d 1805 n->make_junk(tok_junk_unclosed_embedded);
46b5cfb2 1806 return n;
54dfabe9 1807 }
2f1a1aea 1808
bb2e3076
FCE
1809 // We're committed to recognizing at least the first character
1810 // as an operator.
2f1a1aea 1811 n->type = tok_operator;
aa389a19 1812 token_str = (char) c;
2f1a1aea 1813
bb2e3076 1814 // match all valid operators, in decreasing size order
66c7d4c1 1815 if ((c == '<' && c2 == '<' && c3 == '<') ||
161f6f7b 1816 (c == '>' && c2 == '>' && c3 == '>') ||
66c7d4c1
JS
1817 (c == '<' && c2 == '<' && c3 == '=') ||
1818 (c == '>' && c2 == '>' && c3 == '='))
82919855 1819 {
aa389a19
FCE
1820 token_str.push_back (c2);
1821 token_str.push_back (c3);
46b5cfb2 1822 input_get (); // c2
46b5cfb2 1823 input_get (); // c3
bb2e3076 1824 }
66c7d4c1
JS
1825 else if ((c == '=' && c2 == '=') ||
1826 (c == '!' && c2 == '=') ||
1827 (c == '<' && c2 == '=') ||
1828 (c == '>' && c2 == '=') ||
93daaca8
SM
1829 (c == '=' && c2 == '~') ||
1830 (c == '!' && c2 == '~') ||
66c7d4c1
JS
1831 (c == '+' && c2 == '=') ||
1832 (c == '-' && c2 == '=') ||
1833 (c == '*' && c2 == '=') ||
1834 (c == '/' && c2 == '=') ||
1835 (c == '%' && c2 == '=') ||
1836 (c == '&' && c2 == '=') ||
1837 (c == '^' && c2 == '=') ||
1838 (c == '|' && c2 == '=') ||
1839 (c == '.' && c2 == '=') ||
1840 (c == '&' && c2 == '&') ||
1841 (c == '|' && c2 == '|') ||
1842 (c == '+' && c2 == '+') ||
1843 (c == '-' && c2 == '-') ||
1844 (c == '-' && c2 == '>') ||
1845 (c == '<' && c2 == '<') ||
1846 (c == '>' && c2 == '>') ||
177a8ead 1847 // preprocessor tokens
66c7d4c1
JS
1848 (c == '%' && c2 == '(') ||
1849 (c == '%' && c2 == '?') ||
1850 (c == '%' && c2 == ':') ||
1851 (c == '%' && c2 == ')'))
bb2e3076 1852 {
aa389a19 1853 token_str.push_back (c2);
bb2e3076 1854 input_get (); // swallow other character
dff50e09 1855 }
2f1a1aea 1856
47d349b1 1857 n->content = token_str;
2f1a1aea
FCE
1858 return n;
1859 }
1860
1861 else
1862 {
1863 n->type = tok_junk;
e3795795
FCE
1864 ostringstream s;
1865 s << "\\x" << hex << setw(2) << setfill('0') << c;
47d349b1 1866 n->content = s.str();
10e7c19d
JS
1867 // signal parser to emit "expected X, found junk" type error
1868 n->make_junk(tok_junk_unknown);
2f1a1aea
FCE
1869 return n;
1870 }
1871}
1872
16fc963f
SM
1873// ------------------------------------------------------------------------
1874
1875void
10e7c19d 1876token::make_junk (token_junk_type junk)
16fc963f
SM
1877{
1878 type = tok_junk;
10e7c19d
JS
1879 junk_type = junk;
1880}
1881
1882// ------------------------------------------------------------------------
1883
1884string
1885token::junk_message(systemtap_session& session) const
1886{
1887 switch (junk_type)
1888 {
1889 case tok_junk_nested_arg:
1890 return _("invalid nested substitution of command line arguments");
1891
1892 case tok_junk_invalid_arg:
1893 return _F("command line argument out of range [1-%lu]",
1894 (unsigned long) session.args.size());
1895
1896 case tok_junk_unclosed_quote:
1897 return _("Could not find matching closing quote");
1898
1899 case tok_junk_unclosed_embedded:
1900 return _("Could not find matching '%}' to close embedded function block");
1901
1902 default:
1903 return _("unknown junk token");
1904 }
16fc963f 1905}
2f1a1aea
FCE
1906
1907// ------------------------------------------------------------------------
1908
1909stapfile*
f8405ea5 1910parser::parse ()
2f1a1aea
FCE
1911{
1912 stapfile* f = new stapfile;
15f4ba98 1913 f->privileged = this->privileged;
1b1b4ceb 1914 input.set_current_file (f);
56099f08
FCE
1915
1916 bool empty = true;
1917
2f1a1aea
FCE
1918 while (1)
1919 {
1920 try
1921 {
a07a2c28 1922 systemtap_v_seen = 0;
2f1a1aea 1923 const token* t = peek ();
534aad8b 1924 if (! t) // nice clean EOF, modulo any preprocessing that occurred
2f1a1aea
FCE
1925 break;
1926
56099f08 1927 empty = false;
6e213f58
DS
1928 if (t->type == tok_keyword && t->content == "probe")
1929 {
1930 context = con_probe;
1931 parse_probe (f->probes, f->aliases);
1932 }
38bf68a8
MC
1933 else if (t->type == tok_keyword && t->content == "private")
1934 {
1935 context = con_unknown;
1936 parse_private (f->globals, f->probes, f->name, f->functions);
1937 }
6e213f58
DS
1938 else if (t->type == tok_keyword && t->content == "global")
1939 {
1940 context = con_global;
38bf68a8 1941 parse_global (f->globals, f->probes, f->name);
6e213f58
DS
1942 }
1943 else if (t->type == tok_keyword && t->content == "function")
1944 {
1945 context = con_function;
38bf68a8 1946 parse_functiondecl (f->functions, f->name);
6e213f58 1947 }
54dfabe9 1948 else if (t->type == tok_embedded)
6e213f58
DS
1949 {
1950 context = con_embedded;
1951 f->embeds.push_back (parse_embeddedcode ());
1952 }
2f1a1aea 1953 else
6e213f58
DS
1954 {
1955 context = con_unknown;
42eed2a0 1956 throw PARSE_ERROR (_("expected 'probe', 'global', 'private', 'function', or '%{'"));
6e213f58 1957 }
2f1a1aea
FCE
1958 }
1959 catch (parse_error& pe)
1960 {
7ac01ea0 1961 print_error (pe, errs_as_warnings);
16fc963f
SM
1962
1963 // XXX: do we want tok_junk to be able to force skip_some behaviour?
cd7116b8 1964 if (pe.skip_some) // for recovery
46954f1d
FCE
1965 // Quietly swallow all tokens until the next keyword we can start parsing from.
1966 while (1)
1967 try
1968 {
cd7116b8
FCE
1969 {
1970 const token* t = peek ();
1971 if (! t)
1972 break;
46954f1d 1973 if (t->type == tok_keyword && t->content == "probe") break;
42eed2a0 1974 else if (t->type == tok_keyword && t->content == "private") break;
46954f1d
FCE
1975 else if (t->type == tok_keyword && t->content == "global") break;
1976 else if (t->type == tok_keyword && t->content == "function") break;
1977 else if (t->type == tok_embedded) break;
731a5359 1978 swallow (); // swallow it
cd7116b8 1979 }
46954f1d
FCE
1980 }
1981 catch (parse_error& pe2)
1982 {
1983 // parse error during recovery ... ugh
1984 print_error (pe2);
1985 }
177a8ead 1986 }
2f1a1aea
FCE
1987 }
1988
4bc2b5cd 1989 if (empty && user_file)
56099f08 1990 {
534aad8b
SM
1991 // vary message depending on whether file was *actually* empty:
1992 cerr << (input.saw_tokens
1993 ? _F("Input file '%s' is empty after preprocessing.", input_name.c_str())
4cd32d8c 1994 : _F("Input file '%s' is empty.", input_name.c_str()))
534aad8b 1995 << endl;
56099f08 1996 delete f;
2203b032 1997 f = 0;
56099f08
FCE
1998 }
1999 else if (num_errors > 0)
2f1a1aea 2000 {
52c2652f 2001 cerr << _NF("%d parse error.", "%d parse errors.", num_errors, num_errors) << endl;
2f1a1aea 2002 delete f;
2203b032 2003 f = 0;
2f1a1aea 2004 }
dff50e09 2005
2203b032 2006 input.set_current_file(0);
2f1a1aea
FCE
2007 return f;
2008}
2009
2010
101b0805 2011probe*
f8405ea5 2012parser::parse_synthetic_probe (const token* chain)
101b0805
JS
2013{
2014 probe* p = NULL;
2015 stapfile* f = new stapfile;
15f4ba98 2016 f->privileged = this->privileged;
101b0805
JS
2017 f->synthetic = true;
2018 input.set_current_file (f);
2019 input.set_current_token_chain (chain);
2020
2021 try
2022 {
2023 context = con_probe;
2024 parse_probe (f->probes, f->aliases);
2025
2026 if (f->probes.size() != 1 || !f->aliases.empty())
2027 throw PARSE_ERROR (_("expected a single synthetic probe"));
2028 p = f->probes[0];
2029 }
2030 catch (parse_error& pe)
2031 {
2032 print_error (pe, errs_as_warnings);
2033 }
2034
2035 // TODO check for unparsed tokens?
2036
2037 input.set_current_file(0);
2038 input.set_current_token_chain(0);
e7540f55 2039 p->synthetic = true;
101b0805
JS
2040 return p;
2041}
2042
2043
20c6c071 2044void
aa389a19
FCE
2045parser::parse_probe (vector<probe *> & probe_ret,
2046 vector<probe_alias *> & alias_ret)
2f1a1aea 2047{
82919855 2048 const token* t0 = next ();
6e213f58 2049 if (! (t0->type == tok_keyword && t0->content == "probe"))
f0454224 2050 throw PARSE_ERROR (_("expected 'probe'"));
82919855 2051
20c6c071
GH
2052 vector<probe_point *> aliases;
2053 vector<probe_point *> locations;
2054
97266278
LG
2055 int epilogue_alias = 0;
2056
2f1a1aea
FCE
2057 while (1)
2058 {
05f925e9 2059 vector<probe_point*> pps = parse_probe_points();
dff50e09 2060
b4ceace2 2061 const token* t = peek ();
380d759b 2062 if (pps.size() == 1 && t
b4ceace2
FCE
2063 && t->type == tok_operator && t->content == "=")
2064 {
380d759b
FL
2065 if (pps[0]->optional || pps[0]->sufficient)
2066 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps[0]->components.front()->tok);
2067 aliases.push_back(pps[0]);
731a5359 2068 swallow ();
b4ceace2
FCE
2069 continue;
2070 }
380d759b 2071 else if (pps.size() == 1 && t
97266278
LG
2072 && t->type == tok_operator && t->content == "+=")
2073 {
380d759b
FL
2074 if (pps[0]->optional || pps[0]->sufficient)
2075 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pps[0]->components.front()->tok);
2076 aliases.push_back(pps[0]);
97266278 2077 epilogue_alias = 1;
731a5359 2078 swallow ();
97266278
LG
2079 continue;
2080 }
b4ceace2
FCE
2081 else if (t && t->type == tok_operator && t->content == "{")
2082 {
380d759b 2083 locations.insert(locations.end(), pps.begin(), pps.end());
b4ceace2
FCE
2084 break;
2085 }
2f1a1aea 2086 else
f0454224 2087 throw PARSE_ERROR (_("expected probe point specifier"));
2f1a1aea 2088 }
20c6c071 2089
20c6c071
GH
2090 if (aliases.empty())
2091 {
54dfabe9
FCE
2092 probe* p = new probe;
2093 p->tok = t0;
2094 p->locations = locations;
2095 p->body = parse_stmt_block ();
37ebca01 2096 p->privileged = privileged;
a07a2c28 2097 p->systemtap_v_conditional = systemtap_v_seen;
54dfabe9 2098 probe_ret.push_back (p);
20c6c071
GH
2099 }
2100 else
2101 {
54dfabe9 2102 probe_alias* p = new probe_alias (aliases);
97266278
LG
2103 if(epilogue_alias)
2104 p->epilogue_style = true;
2105 else
2106 p->epilogue_style = false;
54dfabe9
FCE
2107 p->tok = t0;
2108 p->locations = locations;
2109 p->body = parse_stmt_block ();
37ebca01 2110 p->privileged = privileged;
a07a2c28 2111 p->systemtap_v_conditional = systemtap_v_seen;
54dfabe9 2112 alias_ret.push_back (p);
20c6c071 2113 }
54dfabe9 2114}
20c6c071 2115
54dfabe9
FCE
2116
2117embeddedcode*
2118parser::parse_embeddedcode ()
2119{
2120 embeddedcode* e = new embeddedcode;
2121 const token* t = next ();
2122 if (t->type != tok_embedded)
f0454224 2123 throw PARSE_ERROR (_("expected '%{'"));
24cb178f
FCE
2124
2125 if (! privileged)
f0454224 2126 throw PARSE_ERROR (_("embedded code in unprivileged script; need stap -g"),
cd7116b8 2127 false /* don't skip tokens for parse resumption */);
54dfabe9
FCE
2128
2129 e->tok = t;
47d349b1 2130 e->code = t->content;
54dfabe9 2131 return e;
2f1a1aea
FCE
2132}
2133
2134
2135block*
56099f08 2136parser::parse_stmt_block ()
2f1a1aea
FCE
2137{
2138 block* pb = new block;
2139
56099f08
FCE
2140 const token* t = next ();
2141 if (! (t->type == tok_operator && t->content == "{"))
f0454224 2142 throw PARSE_ERROR (_("expected '{'"));
56099f08
FCE
2143
2144 pb->tok = t;
2b066ec1 2145
2f1a1aea
FCE
2146 while (1)
2147 {
46954f1d
FCE
2148 t = peek ();
2149 if (t && t->type == tok_operator && t->content == "}")
2150 {
731a5359 2151 swallow ();
46954f1d
FCE
2152 break;
2153 }
2154 pb->statements.push_back (parse_statement ());
2f1a1aea
FCE
2155 }
2156
2157 return pb;
2158}
2159
2160
f4fe2e93
FCE
2161try_block*
2162parser::parse_try_block ()
2163{
2164 try_block* pb = new try_block;
2165
731a5359 2166 pb->tok = expect_kw_token ("try");
f4fe2e93
FCE
2167 pb->try_block = parse_stmt_block();
2168 expect_kw ("catch");
2169
2170 const token* t = peek ();
3819d181 2171 if (t != NULL && t->type == tok_operator && t->content == "(")
f4fe2e93 2172 {
731a5359 2173 swallow (); // swallow the '('
f4fe2e93
FCE
2174
2175 t = next();
2176 if (! (t->type == tok_identifier))
f0454224 2177 throw PARSE_ERROR (_("expected identifier"));
f4fe2e93
FCE
2178 symbol* sym = new symbol;
2179 sym->tok = t;
a3e980f9 2180 sym->name = t->content;
f4fe2e93
FCE
2181 pb->catch_error_var = sym;
2182
2183 expect_op (")");
2184 }
2185 else
2186 pb->catch_error_var = 0;
2187
2188 pb->catch_block = parse_stmt_block();
2189
2190 return pb;
2191}
2192
2193
2194
2f1a1aea
FCE
2195statement*
2196parser::parse_statement ()
2197{
40b71c47 2198 statement *ret;
2f1a1aea
FCE
2199 const token* t = peek ();
2200 if (t && t->type == tok_operator && t->content == ";")
f946b10f 2201 return new null_statement (next ());
dff50e09 2202 else if (t && t->type == tok_operator && t->content == "{")
40b71c47 2203 return parse_stmt_block (); // Don't squash semicolons.
f4fe2e93
FCE
2204 else if (t && t->type == tok_keyword && t->content == "try")
2205 return parse_try_block (); // Don't squash semicolons.
6e213f58 2206 else if (t && t->type == tok_keyword && t->content == "if")
40b71c47 2207 return parse_if_statement (); // Don't squash semicolons.
6e213f58 2208 else if (t && t->type == tok_keyword && t->content == "for")
40b71c47 2209 return parse_for_loop (); // Don't squash semicolons.
6e213f58 2210 else if (t && t->type == tok_keyword && t->content == "foreach")
40b71c47
MW
2211 return parse_foreach_loop (); // Don't squash semicolons.
2212 else if (t && t->type == tok_keyword && t->content == "while")
2213 return parse_while_loop (); // Don't squash semicolons.
6e213f58 2214 else if (t && t->type == tok_keyword && t->content == "return")
40b71c47 2215 ret = parse_return_statement ();
6e213f58 2216 else if (t && t->type == tok_keyword && t->content == "delete")
40b71c47 2217 ret = parse_delete_statement ();
6e213f58 2218 else if (t && t->type == tok_keyword && t->content == "break")
40b71c47 2219 ret = parse_break_statement ();
6e213f58 2220 else if (t && t->type == tok_keyword && t->content == "continue")
40b71c47 2221 ret = parse_continue_statement ();
6e213f58 2222 else if (t && t->type == tok_keyword && t->content == "next")
40b71c47 2223 ret = parse_next_statement ();
2f1a1aea
FCE
2224 else if (t && (t->type == tok_operator || // expressions are flexible
2225 t->type == tok_identifier ||
2226 t->type == tok_number ||
7d902887
FCE
2227 t->type == tok_string ||
2228 t->type == tok_embedded ))
40b71c47 2229 ret = parse_expr_statement ();
54dfabe9 2230 // XXX: consider generally accepting tok_embedded here too
2f1a1aea 2231 else
f0454224 2232 throw PARSE_ERROR (_("expected statement"));
40b71c47
MW
2233
2234 // Squash "empty" trailing colons after any "non-block-like" statement.
2235 t = peek ();
2236 if (t && t->type == tok_operator && t->content == ";")
2237 {
731a5359 2238 swallow (); // Silently eat trailing ; after statement
40b71c47
MW
2239 }
2240
2241 return ret;
2f1a1aea
FCE
2242}
2243
38bf68a8 2244void
f41e297c
JS
2245parser::parse_private (vector <vardecl*>& globals, vector<probe*>& probes,
2246 string const & fname, vector<functiondecl*>& functions)
38bf68a8
MC
2247{
2248 const token* t = next ();
127e4e36 2249 if (! (t->type == tok_keyword && t->content == "private"))
38bf68a8
MC
2250 throw PARSE_ERROR (_("expected 'private'"));
2251 swallow ();
2252 t = next ();
2253 if (t->type == tok_keyword && t->content == "function")
2254 {
2255 swallow ();
2256 context = con_function;
2257 do_parse_functiondecl(functions, t, fname, true);
2258 }
2259 else if (t->type == tok_keyword && t->content == "global")
2260 {
2261 swallow ();
2262 context = con_global;
2263 t = next ();
127e4e36 2264 if (! (t->type == tok_identifier))
38bf68a8
MC
2265 throw PARSE_ERROR (_("expected identifier"));
2266 do_parse_global(globals, probes, fname, t, true);
2267 }
2268 // The `private <identifier>` is an acceptable shorthand
2269 // for `private global <identifier>` per above.
2270 else if (t->type == tok_identifier)
2271 {
2272 context = con_global;
2273 do_parse_global(globals, probes, fname, t, true);
2274 }
2275 else
2276 throw PARSE_ERROR (_("expected 'function' or identifier"));
2277}
2f1a1aea 2278
56099f08 2279void
f41e297c
JS
2280parser::parse_global (vector <vardecl*>& globals, vector<probe*>& probes,
2281 string const & fname)
2f1a1aea 2282{
82919855 2283 const token* t0 = next ();
6e213f58 2284 if (! (t0->type == tok_keyword && t0->content == "global"))
38bf68a8 2285 throw PARSE_ERROR (_("expected 'global' or 'private'"));
731a5359 2286 swallow ();
38bf68a8
MC
2287 do_parse_global(globals, probes, fname, 0, false);
2288}
82919855 2289
38bf68a8 2290void
f41e297c
JS
2291parser::do_parse_global (vector <vardecl*>& globals, vector<probe*>&,
2292 string const & fname, const token* t0, bool priv)
38bf68a8
MC
2293{
2294 bool iter0 = true;
2295 const token* t;
56099f08
FCE
2296 while (1)
2297 {
42eed2a0
MC
2298 t = (iter0 && priv) ? t0 : next ();
2299 iter0 = false;
56099f08 2300 if (! (t->type == tok_identifier))
f0454224 2301 throw PARSE_ERROR (_("expected identifier"));
56099f08 2302
c8fbf931
MC
2303 string gname = "__global_" + string(t->content);
2304 string pname = "__private_" + detox_path(fname) + string(t->content);
2305 string name = priv ? pname : gname;
2306
2b066ec1 2307 for (unsigned i=0; i<globals.size(); i++)
c8fbf931
MC
2308 {
2309 if (globals[i]->name == name)
f0454224 2310 throw PARSE_ERROR (_("duplicate global name"));
c8fbf931
MC
2311 if ((globals[i]->name == gname) || (globals[i]->name == pname))
2312 throw PARSE_ERROR (_("global versus private variable declaration conflict"));
2313 }
38bf68a8 2314
24cb178f 2315 vardecl* d = new vardecl;
9fef07ff 2316 d->unmangled_name = t->content;
38bf68a8 2317 d->name = name;
24cb178f 2318 d->tok = t;
a07a2c28 2319 d->systemtap_v_conditional = systemtap_v_seen;
24cb178f 2320 globals.push_back (d);
56099f08 2321
82919855 2322 t = peek ();
ef474d24 2323
74e6cc92
CM
2324 if(t && t->type == tok_operator && t->content == "%") //wrapping
2325 {
2326 d->wrap = true;
731a5359 2327 swallow ();
74e6cc92
CM
2328 t = peek();
2329 }
2330
ef474d24
JS
2331 if (t && t->type == tok_operator && t->content == "[") // array size
2332 {
2333 int64_t size;
731a5359 2334 swallow ();
ef474d24 2335 expect_number(size);
de506189 2336 if (size <= 0 || size > INT_MAX)
f0454224 2337 throw PARSE_ERROR(_("array size out of range"));
ef474d24
JS
2338 d->maxsize = (int)size;
2339 expect_known(tok_operator, "]");
2340 t = peek ();
2341 }
2342
4b5f3e45 2343 if (t && t->type == tok_operator && t->content == "=") // initialization
ef474d24
JS
2344 {
2345 if (!d->compatible_arity(0))
f0454224 2346 throw PARSE_ERROR(_("only scalar globals can be initialized"));
58701b78 2347 d->set_arity(0, t);
731a5359 2348 next (); // Don't swallow, set_arity() used the peeked token.
ef474d24
JS
2349 d->init = parse_literal ();
2350 d->type = d->init->type;
2351 t = peek ();
2352 }
4b5f3e45 2353
c3799d72 2354 if (t && t->type == tok_operator && t->content == ";") // termination
950da622 2355 {
731a5359 2356 swallow ();
950da622
MW
2357 break;
2358 }
c3799d72 2359
4b5f3e45 2360 if (t && t->type == tok_operator && t->content == ",") // next global
82919855 2361 {
731a5359 2362 swallow ();
82919855
FCE
2363 continue;
2364 }
56099f08 2365 else
82919855 2366 break;
56099f08
FCE
2367 }
2368}
2369
24cb178f 2370void
f41e297c
JS
2371parser::parse_functiondecl (vector<functiondecl*>& functions,
2372 string const & fname)
56099f08 2373{
82919855 2374 const token* t = next ();
6e213f58 2375 if (! (t->type == tok_keyword && t->content == "function"))
f0454224 2376 throw PARSE_ERROR (_("expected 'function'"));
731a5359 2377 swallow ();
38bf68a8
MC
2378 do_parse_functiondecl(functions, t, fname, false);
2379}
56099f08 2380
38bf68a8 2381void
f41e297c
JS
2382parser::do_parse_functiondecl (vector<functiondecl*>& functions, const token* t,
2383 string const & fname, bool priv)
38bf68a8 2384{
82919855 2385 t = next ();
6e213f58
DS
2386 if (! (t->type == tok_identifier)
2387 && ! (t->type == tok_keyword
2388 && (t->content == "string" || t->content == "long")))
f0454224 2389 throw PARSE_ERROR (_("expected identifier"));
24cb178f 2390
c8fbf931
MC
2391 string gname = "__global_" + string(t->content);
2392 string pname = "__private_" + detox_path(fname) + string(t->content);
2393 string name = priv ? pname : gname;
7b5b30a8 2394 name += "__overload_" + lex_cast(session.overload_count[t->content]++);
38bf68a8 2395
24cb178f 2396 functiondecl *fd = new functiondecl ();
9fef07ff 2397 fd->unmangled_name = t->content;
38bf68a8 2398 fd->name = name;
56099f08
FCE
2399 fd->tok = t;
2400
2401 t = next ();
6a505121
FCE
2402 if (t->type == tok_operator && t->content == ":")
2403 {
731a5359 2404 swallow ();
6a505121 2405 t = next ();
6e213f58 2406 if (t->type == tok_keyword && t->content == "string")
6a505121 2407 fd->type = pe_string;
6e213f58 2408 else if (t->type == tok_keyword && t->content == "long")
6a505121 2409 fd->type = pe_long;
f0454224 2410 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
731a5359 2411 swallow ();
6a505121
FCE
2412
2413 t = next ();
2414 }
2415
56099f08 2416 if (! (t->type == tok_operator && t->content == "("))
f0454224 2417 throw PARSE_ERROR (_("expected '('"));
731a5359 2418 swallow ();
56099f08
FCE
2419
2420 while (1)
2421 {
2422 t = next ();
2423
100a540e 2424 // permit zero-argument functions
56099f08 2425 if (t->type == tok_operator && t->content == ")")
731a5359
MW
2426 {
2427 swallow ();
2428 break;
2429 }
56099f08 2430 else if (! (t->type == tok_identifier))
f0454224 2431 throw PARSE_ERROR (_("expected identifier"));
56099f08 2432 vardecl* vd = new vardecl;
9fef07ff 2433 vd->unmangled_name = vd->name = t->content;
59093206
CS
2434
2435 for (auto it = fd->formal_args.begin() ; it != fd->formal_args.end(); ++it)
2436 {
2437 string param = vd->unmangled_name;
2438 if ((*it)->unmangled_name == param)
2439 throw PARSE_ERROR(_("duplicate parameter names"));
2440 }
2441
56099f08
FCE
2442 vd->tok = t;
2443 fd->formal_args.push_back (vd);
a07a2c28 2444 fd->systemtap_v_conditional = systemtap_v_seen;
56099f08
FCE
2445
2446 t = next ();
59093206 2447
6a505121
FCE
2448 if (t->type == tok_operator && t->content == ":")
2449 {
731a5359 2450 swallow ();
6a505121 2451 t = next ();
6e213f58 2452 if (t->type == tok_keyword && t->content == "string")
6a505121 2453 vd->type = pe_string;
6e213f58 2454 else if (t->type == tok_keyword && t->content == "long")
6a505121 2455 vd->type = pe_long;
f0454224 2456 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
731a5359 2457 swallow ();
6a505121
FCE
2458 t = next ();
2459 }
56099f08 2460 if (t->type == tok_operator && t->content == ")")
731a5359
MW
2461 {
2462 swallow ();
2463 break;
59093206 2464 }
56099f08 2465 if (t->type == tok_operator && t->content == ",")
731a5359
MW
2466 {
2467 swallow ();
2468 continue;
2469 }
56099f08 2470 else
f0454224 2471 throw PARSE_ERROR (_("expected ',' or ')'"));
56099f08
FCE
2472 }
2473
7b5b30a8
FL
2474 t = peek();
2475 if (t->type == tok_operator && t->content == ":")
2476 {
7b5b30a8 2477 swallow();
c0d0d623 2478 literal* literal = parse_literal();
28f6e1fe
FCE
2479 literal_number* ln = dynamic_cast<literal_number*>(literal);
2480 if (ln == 0)
2481 throw PARSE_ERROR (_("expected literal number"));
2482 fd->priority = ln->value;
2483
7b5b30a8 2484 // reserve priority 0 for user script implementation
c0d0d623 2485 if (fd->priority < 1)
7b5b30a8 2486 throw PARSE_ERROR (_("specified priority must be > 0"));
c0d0d623 2487 delete literal;
7b5b30a8
FL
2488 }
2489 else if (user_file)
2490 {
2491 // allow script file implementation override automatically when
2492 // priority not specified
2493 fd->priority = 0;
2494 }
2495
54dfabe9
FCE
2496 t = peek ();
2497 if (t && t->type == tok_embedded)
2498 fd->body = parse_embeddedcode ();
2499 else
2500 fd->body = parse_stmt_block ();
24cb178f
FCE
2501
2502 functions.push_back (fd);
2f1a1aea
FCE
2503}
2504
380d759b 2505vector<probe_point*>
05f925e9 2506parser::parse_probe_points()
2f1a1aea 2507{
380d759b
FL
2508 vector<probe_point*> pps;
2509 while (1)
2510 {
05f925e9 2511 vector<probe_point*> tail = parse_components();
380d759b
FL
2512 pps.insert(pps.end(), tail.begin(), tail.end());
2513
2514 const token* t = peek();
2515 if (t && t->type == tok_operator && t->content == ",")
2516 {
2517 swallow();
2518 continue;
2519 }
2520
2521 if (t && t->type == tok_operator
2522 && (t->content == "{" || t->content == "=" ||
2523 t->content == "+="|| t->content == "}"))
2524 break;
2f1a1aea 2525
380d759b
FL
2526 throw PARSE_ERROR (_("expected one of ', { } = +='"));
2527 }
2528 return pps;
2529}
2530
2531vector<probe_point*>
05f925e9 2532parser::parse_components()
380d759b
FL
2533{
2534 vector<probe_point*> pps;
9c0c0e46 2535 while (1)
2f1a1aea 2536 {
05f925e9 2537 vector<probe_point*> suffix = parse_component();
9c0c0e46 2538
380d759b
FL
2539 // Cartesian product of components
2540 if (pps.empty())
2541 pps = suffix;
2542 else
2543 {
2544 assert(!suffix.empty());
2545 vector<probe_point*> product;
2546 for (unsigned i = 0; i < pps.size(); i++)
2547 {
2548 if (pps[i]->optional || pps[i]->sufficient || pps[i]->condition)
2549 throw PARSE_ERROR (_("'?', '!' or condition must only be specified in suffix"),
2550 pps[i]->components[0]->tok);
2551 for (unsigned j = 0; j < suffix.size(); j++)
2552 {
2553 probe_point* pp = new probe_point;
2554 pp->components.insert(pp->components.end(),
05f925e9
FL
2555 pps[i]->components.begin(),
2556 pps[i]->components.end());
380d759b 2557 pp->components.insert(pp->components.end(),
05f925e9
FL
2558 suffix[j]->components.begin(),
2559 suffix[j]->components.end());
380d759b
FL
2560 pp->optional = suffix[j]->optional;
2561 pp->sufficient = suffix[j]->sufficient;
c68fbc62
FL
2562 if (auto_path)
2563 pp->auto_path = suffix[j]->auto_path;
380d759b
FL
2564 pp->condition = suffix[j]->condition;
2565 product.push_back(pp);
2566 }
2567 }
2568 for (unsigned i = 0; i < pps.size(); i++) delete pps[i];
2569 for (unsigned i = 0; i < suffix.size(); i++) delete suffix[i];
2570 pps = product;
2571 }
2572
2573 const token* t = peek();
2574 if (t && t->type == tok_operator && t->content == ".")
2575 {
2576 swallow ();
2577 continue;
2578 }
2579
2580 // We only fall through here at the end of a probe point (past
2581 // all the dotted/parametrized components).
2582
2583 if (t && t->type == tok_operator &&
2584 (t->content == "?" || t->content == "!"))
2585 {
2586 for (unsigned i = 0; i < pps.size(); i++)
2587 {
2588 if (pps[i]->optional || pps[i]->sufficient)
2589 throw PARSE_ERROR (_("'?' or '!' respecified"));
2590 pps[i]->optional = true;
2591 if (t->content == "!") pps[i]->sufficient = true;
2592 }
2593 // NB: sufficient implies optional
2594 swallow ();
2595 t = peek ();
2596 // fall through
2597 }
2598
2599 if (t && t->type == tok_keyword && t->content == "if")
2600 {
2601 swallow ();
2602 t = peek ();
2603 if (!(t && t->type == tok_operator && t->content == "("))
2604 throw PARSE_ERROR (_("expected '('"));
2605 swallow ();
2606
2607 expression* e = parse_expression();
2608 for (unsigned i = 0; i < pps.size(); i++)
2609 {
2610 if (pps[i]->condition != 0)
2611 throw PARSE_ERROR (_("condition respecified"));
2612 pps[i]->condition = e;
2613 }
2614
2615 t = peek ();
2616 if (!(t && t->type == tok_operator && t->content == ")"))
2617 throw PARSE_ERROR (_("expected ')'"));
2618 swallow ();
2619 }
2620
2621 break;
2622 }
2623 return pps;
2624}
2625
2626vector<probe_point*>
05f925e9 2627parser::parse_component()
380d759b
FL
2628{
2629 const token* t = next ();
2630 if (! (t->type == tok_identifier
2631 // we must allow ".return" and ".function", which are keywords
2632 || t->type == tok_keyword
2633 // we must allow "*", due to being an operator
2634 || (t->type == tok_operator && (t->content == "*" || t->content == "{"))))
2635 throw PARSE_ERROR (_("expected identifier or '*' or '{'"));
2636
2637 if (t && t->type == tok_operator && t->content == "{")
2638 {
2639 swallow();
05f925e9 2640 vector<probe_point*> pps = parse_probe_points();
380d759b
FL
2641 t = peek();
2642 if (!(t && t->type == tok_operator && t->content == "}"))
2643 throw PARSE_ERROR (_("expected '}'"));
2644 swallow();
2645 return pps;
2646 }
2647 else
2648 {
b5477cd9 2649 // loop which reconstitutes an identifier with wildcards
47d349b1 2650 string content = t->content;
eadd685c 2651 bool changed_p = false;
b5477cd9
SM
2652 while (1)
2653 {
2654 const token* u = peek();
3819d181
MW
2655 if (u == NULL)
2656 break;
b5477cd9
SM
2657 // ensure pieces of the identifier are adjacent:
2658 if (input.ate_whitespace)
2659 break;
2660 // ensure pieces of the identifier are valid:
2661 if (! (u->type == tok_identifier
2662 // we must allow arbitrary keywords with a wildcard
2663 || u->type == tok_keyword
2664 // we must allow "*", due to being an operator
2665 || (u->type == tok_operator && u->content == "*")))
2666 break;
2667
2668 // append u to t
47d349b1 2669 content = content + (string)u->content;
eadd685c 2670 changed_p = true;
380d759b 2671
b5477cd9 2672 // consume u
731a5359 2673 swallow ();
b5477cd9 2674 }
eadd685c
FCE
2675
2676 if (changed_p)
2677 {
2678 // We've already swallowed the first token and we're not
2679 // putting it back; no one else has a copy; so we can
2680 // safely overwrite its content and reuse it.
2681 const_cast<token*>(t)->content = content;
2682 }
9c0c0e46
FCE
2683
2684 probe_point::component* c = new probe_point::component;
a3e980f9 2685 c->functor = t->content;
f1a0157a 2686 c->tok = t;
380d759b
FL
2687 vector<probe_point*> pps;
2688 probe_point* pp = new probe_point;
c68fbc62
FL
2689 if (auto_path)
2690 pp->auto_path = input_name;
380d759b
FL
2691 pp->components.push_back(c);
2692 pps.push_back(pp);
6e3347a9 2693 // NB we may add c->arg soon
9c0c0e46
FCE
2694
2695 t = peek ();
a477f3f1 2696
6e3347a9 2697 // consume optional parameter
9c0c0e46
FCE
2698 if (t && t->type == tok_operator && t->content == "(")
2699 {
731a5359 2700 swallow (); // consume "("
9c0c0e46
FCE
2701 c->arg = parse_literal ();
2702
2703 t = next ();
2704 if (! (t->type == tok_operator && t->content == ")"))
f0454224 2705 throw PARSE_ERROR (_("expected ')'"));
731a5359 2706 swallow ();
6e3347a9 2707 }
3c4ac468 2708
380d759b 2709 return pps;
2f1a1aea 2710 }
2f1a1aea
FCE
2711}
2712
d24f1ff4
SM
2713literal_string*
2714parser::consume_string_literals(const token *t)
2715{
0ad0aae2 2716 literal_string *ls = new literal_string (t->content);
d24f1ff4
SM
2717
2718 // PR11208: check if the next token is also a string literal;
2719 // auto-concatenate it. This is complicated to the extent that we
2720 // need to skip intermediate whitespace.
2721 //
2722 // NB for versions prior to 2.0: but don't skip over intervening comments
0ad0aae2
JS
2723 string concat;
2724 bool p_concat = false;
d24f1ff4
SM
2725 const token *n = peek();
2726 while (n != NULL && n->type == tok_string
f8405ea5 2727 && ! (!input.has_version("2.0") && input.ate_comment))
d24f1ff4 2728 {
0ad0aae2
JS
2729 if (!p_concat)
2730 {
2731 concat = t->content;
2732 p_concat = true;
2733 }
2734 concat.append(n->content.data(), n->content.size());
2735 next(); // consume the token
d24f1ff4
SM
2736 n = peek();
2737 }
0ad0aae2
JS
2738 if (p_concat)
2739 ls->value = concat;
d24f1ff4
SM
2740 return ls;
2741}
2742
2743
2744// Parse a string literal and perform backslash escaping on the contents:
2745literal_string*
2746parser::parse_literal_string ()
2747{
2748 const token* t = next ();
2749 literal_string* l;
2750 if (t->type == tok_string)
2751 l = consume_string_literals (t);
2752 else
f0454224 2753 throw PARSE_ERROR (_("expected literal string"));
d24f1ff4
SM
2754
2755 l->tok = t;
2756 return l;
2757}
2758
2759
2f1a1aea
FCE
2760literal*
2761parser::parse_literal ()
2762{
2763 const token* t = next ();
56099f08 2764 literal* l;
2f1a1aea 2765 if (t->type == tok_string)
c5be7511 2766 {
d24f1ff4 2767 l = consume_string_literals (t);
c5be7511 2768 }
16e8f21f 2769 else
9c0c0e46 2770 {
16e8f21f
JS
2771 bool neg = false;
2772 if (t->type == tok_operator && t->content == "-")
2773 {
2774 neg = true;
731a5359 2775 swallow ();
16e8f21f
JS
2776 t = next ();
2777 }
2778
2779 if (t->type == tok_number)
2780 {
7371cd19
JS
2781 const string& s = t->content;
2782 const char* startp = s.c_str ();
16e8f21f
JS
2783 char* endp = (char*) startp;
2784
2785 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
2786 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
79e6d33f
JS
2787 // since the lexer only gives us positive digit strings, but we'll
2788 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
16e8f21f
JS
2789 errno = 0;
2790 long long value = (long long) strtoull (startp, & endp, 0);
16e8f21f 2791 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
79e6d33f 2792 || (neg && (unsigned long long) value > 9223372036854775808ULL)
16e8f21f
JS
2793 || (unsigned long long) value > 18446744073709551615ULL
2794 || value < -9223372036854775807LL-1)
f0454224 2795 throw PARSE_ERROR (_("number invalid or out of range"));
16e8f21f 2796
79e6d33f
JS
2797 if (neg)
2798 value = -value;
2799
16e8f21f
JS
2800 l = new literal_number (value);
2801 }
2802 else
f0454224 2803 throw PARSE_ERROR (_("expected literal string or number"));
9c0c0e46 2804 }
56099f08
FCE
2805
2806 l->tok = t;
2807 return l;
2f1a1aea
FCE
2808}
2809
2810
2811if_statement*
2812parser::parse_if_statement ()
2813{
2814 const token* t = next ();
6e213f58 2815 if (! (t->type == tok_keyword && t->content == "if"))
f0454224 2816 throw PARSE_ERROR (_("expected 'if'"));
56099f08
FCE
2817 if_statement* s = new if_statement;
2818 s->tok = t;
2819
2820 t = next ();
2f1a1aea 2821 if (! (t->type == tok_operator && t->content == "("))
f0454224 2822 throw PARSE_ERROR (_("expected '('"));
731a5359 2823 swallow ();
2f1a1aea 2824
2f1a1aea
FCE
2825 s->condition = parse_expression ();
2826
2827 t = next ();
2828 if (! (t->type == tok_operator && t->content == ")"))
f0454224 2829 throw PARSE_ERROR (_("expected ')'"));
731a5359 2830 swallow ();
2f1a1aea
FCE
2831
2832 s->thenblock = parse_statement ();
2833
2834 t = peek ();
6e213f58 2835 if (t && t->type == tok_keyword && t->content == "else")
2f1a1aea 2836 {
731a5359 2837 swallow ();
2f1a1aea
FCE
2838 s->elseblock = parse_statement ();
2839 }
ed10c639
FCE
2840 else
2841 s->elseblock = 0; // in case not otherwise initialized
2f1a1aea
FCE
2842
2843 return s;
2844}
2845
2846
69c68955
FCE
2847expr_statement*
2848parser::parse_expr_statement ()
2849{
2850 expr_statement *es = new expr_statement;
2851 const token* t = peek ();
5e58d11c 2852 if (t == NULL)
f0454224 2853 throw PARSE_ERROR (_("expression statement expected"));
731a5359
MW
2854 // Copy, we only peeked, parse_expression might swallow.
2855 es->tok = new token (*t);
69c68955
FCE
2856 es->value = parse_expression ();
2857 return es;
2858}
2859
2860
56099f08
FCE
2861return_statement*
2862parser::parse_return_statement ()
2863{
2864 const token* t = next ();
6e213f58 2865 if (! (t->type == tok_keyword && t->content == "return"))
f0454224 2866 throw PARSE_ERROR (_("expected 'return'"));
6e213f58 2867 if (context != con_function)
f0454224 2868 throw PARSE_ERROR (_("found 'return' not in function context"));
56099f08
FCE
2869 return_statement* s = new return_statement;
2870 s->tok = t;
93ceea00
YZ
2871
2872 t = peek ();
2873 if (t->type == tok_operator && (t->content == ";" || t->content == "}"))
2874 s->value = NULL; // no return value
2875 else
2876 s->value = parse_expression ();
56099f08
FCE
2877 return s;
2878}
2879
2880
2881delete_statement*
2882parser::parse_delete_statement ()
2883{
2884 const token* t = next ();
6e213f58 2885 if (! (t->type == tok_keyword && t->content == "delete"))
f0454224 2886 throw PARSE_ERROR (_("expected 'delete'"));
56099f08
FCE
2887 delete_statement* s = new delete_statement;
2888 s->tok = t;
2889 s->value = parse_expression ();
2890 return s;
2891}
2892
2893
f3c26ea5
FCE
2894next_statement*
2895parser::parse_next_statement ()
2896{
2897 const token* t = next ();
6e213f58 2898 if (! (t->type == tok_keyword && t->content == "next"))
f0454224 2899 throw PARSE_ERROR (_("expected 'next'"));
f3c26ea5
FCE
2900 next_statement* s = new next_statement;
2901 s->tok = t;
2902 return s;
2903}
2904
2905
2906break_statement*
2907parser::parse_break_statement ()
2908{
2909 const token* t = next ();
6e213f58 2910 if (! (t->type == tok_keyword && t->content == "break"))
f0454224 2911 throw PARSE_ERROR (_("expected 'break'"));
f3c26ea5
FCE
2912 break_statement* s = new break_statement;
2913 s->tok = t;
2914 return s;
2915}
2916
2917
2918continue_statement*
2919parser::parse_continue_statement ()
2920{
2921 const token* t = next ();
6e213f58 2922 if (! (t->type == tok_keyword && t->content == "continue"))
f0454224 2923 throw PARSE_ERROR (_("expected 'continue'"));
f3c26ea5
FCE
2924 continue_statement* s = new continue_statement;
2925 s->tok = t;
2926 return s;
2927}
2928
2929
69c68955
FCE
2930for_loop*
2931parser::parse_for_loop ()
2932{
f3c26ea5 2933 const token* t = next ();
6e213f58 2934 if (! (t->type == tok_keyword && t->content == "for"))
f0454224 2935 throw PARSE_ERROR (_("expected 'for'"));
f3c26ea5
FCE
2936 for_loop* s = new for_loop;
2937 s->tok = t;
2938
2939 t = next ();
2940 if (! (t->type == tok_operator && t->content == "("))
f0454224 2941 throw PARSE_ERROR (_("expected '('"));
731a5359 2942 swallow ();
f3c26ea5
FCE
2943
2944 // initializer + ";"
2945 t = peek ();
2946 if (t && t->type == tok_operator && t->content == ";")
2947 {
cbfbbf69 2948 s->init = 0;
731a5359 2949 swallow ();
f3c26ea5
FCE
2950 }
2951 else
2952 {
2953 s->init = parse_expr_statement ();
2954 t = next ();
2955 if (! (t->type == tok_operator && t->content == ";"))
f0454224 2956 throw PARSE_ERROR (_("expected ';'"));
731a5359 2957 swallow ();
f3c26ea5
FCE
2958 }
2959
2960 // condition + ";"
2961 t = peek ();
2962 if (t && t->type == tok_operator && t->content == ";")
2963 {
2964 literal_number* l = new literal_number(1);
2965 s->cond = l;
2966 s->cond->tok = next ();
2967 }
2968 else
2969 {
2970 s->cond = parse_expression ();
2971 t = next ();
2972 if (! (t->type == tok_operator && t->content == ";"))
f0454224 2973 throw PARSE_ERROR (_("expected ';'"));
731a5359 2974 swallow ();
f3c26ea5 2975 }
dff50e09 2976
f3c26ea5
FCE
2977 // increment + ")"
2978 t = peek ();
2979 if (t && t->type == tok_operator && t->content == ")")
2980 {
cbfbbf69 2981 s->incr = 0;
731a5359 2982 swallow ();
f3c26ea5
FCE
2983 }
2984 else
2985 {
2986 s->incr = parse_expr_statement ();
2987 t = next ();
2988 if (! (t->type == tok_operator && t->content == ")"))
f0454224 2989 throw PARSE_ERROR (_("expected ')'"));
731a5359 2990 swallow ();
f3c26ea5
FCE
2991 }
2992
2993 // block
2994 s->block = parse_statement ();
2995
2996 return s;
2997}
2998
2999
3000for_loop*
3001parser::parse_while_loop ()
3002{
3003 const token* t = next ();
6e213f58 3004 if (! (t->type == tok_keyword && t->content == "while"))
f0454224 3005 throw PARSE_ERROR (_("expected 'while'"));
f3c26ea5
FCE
3006 for_loop* s = new for_loop;
3007 s->tok = t;
3008
3009 t = next ();
3010 if (! (t->type == tok_operator && t->content == "("))
f0454224 3011 throw PARSE_ERROR (_("expected '('"));
731a5359 3012 swallow ();
f3c26ea5
FCE
3013
3014 // dummy init and incr fields
cbfbbf69
FCE
3015 s->init = 0;
3016 s->incr = 0;
f3c26ea5
FCE
3017
3018 // condition
3019 s->cond = parse_expression ();
3020
f3c26ea5
FCE
3021 t = next ();
3022 if (! (t->type == tok_operator && t->content == ")"))
f0454224 3023 throw PARSE_ERROR (_("expected ')'"));
731a5359 3024 swallow ();
dff50e09 3025
f3c26ea5
FCE
3026 // block
3027 s->block = parse_statement ();
3028
3029 return s;
69c68955
FCE
3030}
3031
3032
3033foreach_loop*
3034parser::parse_foreach_loop ()
3035{
3036 const token* t = next ();
6e213f58 3037 if (! (t->type == tok_keyword && t->content == "foreach"))
f0454224 3038 throw PARSE_ERROR (_("expected 'foreach'"));
69c68955
FCE
3039 foreach_loop* s = new foreach_loop;
3040 s->tok = t;
93484556 3041 s->sort_direction = 0;
fd5689dc 3042 s->sort_aggr = sc_none;
c261711d 3043 s->value = NULL;
27f21e8c 3044 s->limit = NULL;
69c68955
FCE
3045
3046 t = next ();
3047 if (! (t->type == tok_operator && t->content == "("))
f0454224 3048 throw PARSE_ERROR (_("expected '('"));
731a5359 3049 swallow ();
69c68955 3050
c261711d
JS
3051 symbol* lookahead_sym = NULL;
3052 int lookahead_sort = 0;
3053
3054 t = peek ();
3055 if (t && t->type == tok_identifier)
3056 {
3057 next ();
3058 lookahead_sym = new symbol;
3059 lookahead_sym->tok = t;
a3e980f9 3060 lookahead_sym->name = t->content;
c261711d
JS
3061
3062 t = peek ();
3063 if (t && t->type == tok_operator &&
3064 (t->content == "+" || t->content == "-"))
3065 {
c261711d 3066 lookahead_sort = (t->content == "+") ? 1 : -1;
731a5359 3067 swallow ();
c261711d
JS
3068 }
3069
3070 t = peek ();
3071 if (t && t->type == tok_operator && t->content == "=")
3072 {
731a5359 3073 swallow ();
c261711d
JS
3074 s->value = lookahead_sym;
3075 if (lookahead_sort)
3076 {
3077 s->sort_direction = lookahead_sort;
3078 s->sort_column = 0;
3079 }
3080 lookahead_sym = NULL;
3081 }
3082 }
3083
69c68955
FCE
3084 // see also parse_array_in
3085
3086 bool parenthesized = false;
3087 t = peek ();
c261711d 3088 if (!lookahead_sym && t && t->type == tok_operator && t->content == "[")
69c68955 3089 {
731a5359 3090 swallow ();
69c68955
FCE
3091 parenthesized = true;
3092 }
3093
c261711d
JS
3094 if (lookahead_sym)
3095 {
3096 s->indexes.push_back (lookahead_sym);
3097 if (lookahead_sort)
3098 {
3099 s->sort_direction = lookahead_sort;
3100 s->sort_column = 1;
3101 }
3102 lookahead_sym = NULL;
3103 }
3104 else while (1)
69c68955
FCE
3105 {
3106 t = next ();
3107 if (! (t->type == tok_identifier))
f0454224 3108 throw PARSE_ERROR (_("expected identifier"));
69c68955
FCE
3109 symbol* sym = new symbol;
3110 sym->tok = t;
a3e980f9 3111 sym->name = t->content;
69c68955
FCE
3112 s->indexes.push_back (sym);
3113
93484556
FCE
3114 t = peek ();
3115 if (t && t->type == tok_operator &&
3116 (t->content == "+" || t->content == "-"))
3117 {
3118 if (s->sort_direction)
f0454224 3119 throw PARSE_ERROR (_("multiple sort directives"));
93484556
FCE
3120 s->sort_direction = (t->content == "+") ? 1 : -1;
3121 s->sort_column = s->indexes.size();
731a5359 3122 swallow ();
93484556
FCE
3123 }
3124
69c68955
FCE
3125 if (parenthesized)
3126 {
93484556 3127 t = peek ();
69c68955
FCE
3128 if (t && t->type == tok_operator && t->content == ",")
3129 {
731a5359 3130 swallow ();
69c68955
FCE
3131 continue;
3132 }
3133 else if (t && t->type == tok_operator && t->content == "]")
3134 {
731a5359 3135 swallow ();
69c68955
FCE
3136 break;
3137 }
dff50e09 3138 else
f0454224 3139 throw PARSE_ERROR (_("expected ',' or ']'"));
69c68955
FCE
3140 }
3141 else
3142 break; // expecting only one expression
3143 }
3144
3145 t = next ();
6e213f58 3146 if (! (t->type == tok_keyword && t->content == "in"))
f0454224 3147 throw PARSE_ERROR (_("expected 'in'"));
731a5359 3148 swallow ();
dff50e09 3149
d02548c0 3150 s->base = parse_indexable();
69c68955 3151
3040bf3a
AJ
3152 // check if there was an array slice that was specified
3153 t = peek();
3154 if (t && t->type == tok_operator && t->content == "[")
3155 {
3156 swallow();
3157 while (1)
3158 {
3159 t = peek();
3040bf3a
AJ
3160 if (t && t->type == tok_operator && t->content == "*")
3161 {
45af9d1b
AJ
3162 swallow();
3163 s->array_slice.push_back (NULL);
3040bf3a
AJ
3164 }
3165 else
45af9d1b 3166 s->array_slice.push_back (parse_expression());
3040bf3a
AJ
3167
3168 t = peek ();
3169 if (t && t->type == tok_operator && t->content == ",")
3170 {
3171 swallow ();
3172 continue;
3173 }
3174 else if (t && t->type == tok_operator && t->content == "]")
3175 {
3176 swallow ();
3177 break;
3178 }
3179 else
3180 throw PARSE_ERROR (_("expected ',' or ']'"));
3181 }
3182 }
3183
3184
fd5689dc
FCE
3185 // check for atword, see also expect_ident_or_atword,
3186 t = peek ();
3187 if (t && t->type == tok_operator && t->content[0] == '@')
3188 {
3189 if (t->content == "@avg") s->sort_aggr = sc_average;
3190 else if (t->content == "@min") s->sort_aggr = sc_min;
3191 else if (t->content == "@max") s->sort_aggr = sc_max;
3192 else if (t->content == "@count") s->sort_aggr = sc_count;
3193 else if (t->content == "@sum") s->sort_aggr = sc_sum;
63ead7fa 3194 else if (t->content == "@variance") s->sort_aggr = sc_variance;
f0454224 3195 else throw PARSE_ERROR(_("expected statistical operation"));
fd5689dc
FCE
3196 swallow();
3197
3198 t = peek ();
3199 if (! (t && t->type == tok_operator && (t->content == "+" || t->content == "-")))
f0454224 3200 throw PARSE_ERROR(_("expected sort directive"));
fd5689dc
FCE
3201 }
3202
93484556
FCE
3203 t = peek ();
3204 if (t && t->type == tok_operator &&
3205 (t->content == "+" || t->content == "-"))
3206 {
3207 if (s->sort_direction)
f0454224 3208 throw PARSE_ERROR (_("multiple sort directives"));
93484556
FCE
3209 s->sort_direction = (t->content == "+") ? 1 : -1;
3210 s->sort_column = 0;
731a5359 3211 swallow ();
93484556
FCE
3212 }
3213
27f21e8c
DS
3214 t = peek ();
3215 if (tok_is(t, tok_keyword, "limit"))
3216 {
731a5359 3217 swallow (); // get past the "limit"
27f21e8c
DS
3218 s->limit = parse_expression ();
3219 }
3220
69c68955
FCE
3221 t = next ();
3222 if (! (t->type == tok_operator && t->content == ")"))
f0454224 3223 throw PARSE_ERROR ("expected ')'");
731a5359 3224 swallow ();
69c68955
FCE
3225
3226 s->block = parse_statement ();
3227 return s;
3228}
3229
3230
2f1a1aea
FCE
3231expression*
3232parser::parse_expression ()
3233{
3234 return parse_assignment ();
3235}
3236
2f1a1aea
FCE
3237
3238expression*
3239parser::parse_assignment ()
3240{
3241 expression* op1 = parse_ternary ();
3242
3243 const token* t = peek ();
82919855 3244 // right-associative operators
dff50e09 3245 if (t && t->type == tok_operator
2f1a1aea 3246 && (t->content == "=" ||
82919855 3247 t->content == "<<<" ||
2f1a1aea 3248 t->content == "+=" ||
bb2e3076
FCE
3249 t->content == "-=" ||
3250 t->content == "*=" ||
3251 t->content == "/=" ||
3252 t->content == "%=" ||
3253 t->content == "<<=" ||
3254 t->content == ">>=" ||
3255 t->content == "&=" ||
3256 t->content == "^=" ||
3257 t->content == "|=" ||
d5d7c2cc 3258 t->content == ".=" ||
dff50e09 3259 false))
2f1a1aea 3260 {
bb2e3076 3261 // NB: lvalueness is checked during elaboration / translation
2f1a1aea 3262 assignment* e = new assignment;
56099f08 3263 e->left = op1;
47d349b1 3264 e->op = t->content;
56099f08 3265 e->tok = t;
2f1a1aea 3266 next ();
82919855 3267 e->right = parse_expression ();
56099f08 3268 op1 = e;
2f1a1aea 3269 }
56099f08
FCE
3270
3271 return op1;
2f1a1aea
FCE
3272}
3273
3274
3275expression*
3276parser::parse_ternary ()
3277{
3278 expression* op1 = parse_logical_or ();
3279
3280 const token* t = peek ();
3281 if (t && t->type == tok_operator && t->content == "?")
3282 {
2f1a1aea 3283 ternary_expression* e = new ternary_expression;
56099f08 3284 e->tok = t;
2f1a1aea 3285 e->cond = op1;
56099f08
FCE
3286 next ();
3287 e->truevalue = parse_expression (); // XXX
2f1a1aea
FCE
3288
3289 t = next ();
3290 if (! (t->type == tok_operator && t->content == ":"))
f0454224 3291 throw PARSE_ERROR (_("expected ':'"));
731a5359 3292 swallow ();
2f1a1aea 3293
b1144e77
YZ
3294 if (input.has_version("4.0"))
3295 e->falsevalue = parse_ternary ();
3296 else
3297 e->falsevalue = parse_expression ();
2f1a1aea
FCE
3298 return e;
3299 }
3300 else
3301 return op1;
3302}
3303
3304
3305expression*
3306parser::parse_logical_or ()
3307{
3308 expression* op1 = parse_logical_and ();
dff50e09 3309
2f1a1aea 3310 const token* t = peek ();
56099f08 3311 while (t && t->type == tok_operator && t->content == "||")
2f1a1aea 3312 {
2f1a1aea 3313 logical_or_expr* e = new logical_or_expr;
56099f08 3314 e->tok = t;
47d349b1 3315 e->op = t->content;
2f1a1aea 3316 e->left = op1;
56099f08
FCE
3317 next ();
3318 e->right = parse_logical_and ();
3319 op1 = e;
3320 t = peek ();
2f1a1aea 3321 }
56099f08
FCE
3322
3323 return op1;
2f1a1aea
FCE
3324}
3325
3326
3327expression*
3328parser::parse_logical_and ()
3329{
bb2e3076 3330 expression* op1 = parse_boolean_or ();
2f1a1aea
FCE
3331
3332 const token* t = peek ();
56099f08 3333 while (t && t->type == tok_operator && t->content == "&&")
2f1a1aea 3334 {
2f1a1aea
FCE
3335 logical_and_expr *e = new logical_and_expr;
3336 e->left = op1;
47d349b1 3337 e->op = t->content;
56099f08
FCE
3338 e->tok = t;
3339 next ();
bb2e3076
FCE
3340 e->right = parse_boolean_or ();
3341 op1 = e;
3342 t = peek ();
3343 }
3344
3345 return op1;
3346}
3347
3348
3349expression*
3350parser::parse_boolean_or ()
3351{
3352 expression* op1 = parse_boolean_xor ();
3353
3354 const token* t = peek ();
3355 while (t && t->type == tok_operator && t->content == "|")
3356 {
3357 binary_expression* e = new binary_expression;
3358 e->left = op1;
47d349b1 3359 e->op = t->content;
bb2e3076
FCE
3360 e->tok = t;
3361 next ();
3362 e->right = parse_boolean_xor ();
3363 op1 = e;
3364 t = peek ();
3365 }
3366
3367 return op1;
3368}
3369
3370
3371expression*
3372parser::parse_boolean_xor ()
3373{
3374 expression* op1 = parse_boolean_and ();
3375
3376 const token* t = peek ();
3377 while (t && t->type == tok_operator && t->content == "^")
3378 {
3379 binary_expression* e = new binary_expression;
3380 e->left = op1;
47d349b1 3381 e->op = t->content;
bb2e3076
FCE
3382 e->tok = t;
3383 next ();
3384 e->right = parse_boolean_and ();
3385 op1 = e;
3386 t = peek ();
3387 }
3388
3389 return op1;
3390}
3391
3392
3393expression*
3394parser::parse_boolean_and ()
3395{
3396 expression* op1 = parse_array_in ();
3397
3398 const token* t = peek ();
3399 while (t && t->type == tok_operator && t->content == "&")
3400 {
3401 binary_expression* e = new binary_expression;
3402 e->left = op1;
47d349b1 3403 e->op = t->content;
bb2e3076
FCE
3404 e->tok = t;
3405 next ();
56099f08
FCE
3406 e->right = parse_array_in ();
3407 op1 = e;
3408 t = peek ();
2f1a1aea 3409 }
56099f08
FCE
3410
3411 return op1;
2f1a1aea
FCE
3412}
3413
3414
3415expression*
3416parser::parse_array_in ()
3417{
ce10591c 3418 // This is a very tricky case. All these are legit expressions:
69c68955 3419 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
ce10591c
FCE
3420 vector<expression*> indexes;
3421 bool parenthesized = false;
2f1a1aea
FCE
3422
3423 const token* t = peek ();
69c68955 3424 if (t && t->type == tok_operator && t->content == "[")
ce10591c 3425 {
731a5359 3426 swallow ();
ce10591c
FCE
3427 parenthesized = true;
3428 }
3429
3430 while (1)
3431 {
e225e273
AJ
3432 t = peek();
3433 if (t && t->type == tok_operator && t->content == "*" && parenthesized)
3434 {
45af9d1b
AJ
3435 swallow();
3436 indexes.push_back(NULL);
e225e273
AJ
3437 }
3438 else
3439 {
3440 expression* op1 = parse_comparison_or_regex_query ();
3441 indexes.push_back (op1);
3442 }
ce10591c
FCE
3443
3444 if (parenthesized)
3445 {
3446 const token* t = peek ();
3447 if (t && t->type == tok_operator && t->content == ",")
3448 {
731a5359 3449 swallow ();
ce10591c
FCE
3450 continue;
3451 }
69c68955 3452 else if (t && t->type == tok_operator && t->content == "]")
ce10591c 3453 {
731a5359 3454 swallow ();
ce10591c
FCE
3455 break;
3456 }
dff50e09 3457 else
f0454224 3458 throw PARSE_ERROR (_("expected ',' or ']'"));
ce10591c
FCE
3459 }
3460 else
3461 break; // expecting only one expression
3462 }
3463
3464 t = peek ();
6e213f58 3465 if (t && t->type == tok_keyword && t->content == "in")
2f1a1aea 3466 {
2f1a1aea 3467 array_in *e = new array_in;
56099f08 3468 e->tok = t;
731a5359 3469 next ();
ce10591c
FCE
3470
3471 arrayindex* a = new arrayindex;
3472 a->indexes = indexes;
d02548c0 3473 a->base = parse_indexable();
d15d767c 3474 a->tok = a->base->tok;
ce10591c 3475 e->operand = a;
2f1a1aea
FCE
3476 return e;
3477 }
ce10591c
FCE
3478 else if (indexes.size() == 1) // no "in" - need one expression only
3479 return indexes[0];
2f1a1aea 3480 else
f0454224 3481 throw PARSE_ERROR (_("unexpected comma-separated expression list"));
2f1a1aea
FCE
3482}
3483
3484
3485expression*
93daaca8 3486parser::parse_comparison_or_regex_query ()
2f1a1aea 3487{
bb2e3076 3488 expression* op1 = parse_shift ();
2f1a1aea 3489
557abe61 3490 // XXX precedence -- perhaps a =~ b == c =~ d --> (a =~ b) == (c =~ d) ?
93daaca8
SM
3491 const token *t = peek();
3492 if (t && t->type == tok_operator
3493 && (t->content == "=~" ||
3494 t->content == "!~"))
3495 {
3496 regex_query* r = new regex_query;
3497 r->left = op1;
47d349b1 3498 r->op = t->content;
93daaca8
SM
3499 r->tok = t;
3500 next ();
d3bc48f0 3501 r->right = parse_literal_string();
93daaca8
SM
3502 op1 = r;
3503 t = peek ();
3504 }
3505 else while (t && t->type == tok_operator
553d27a5
FCE
3506 && (t->content == ">" ||
3507 t->content == "<" ||
3508 t->content == "==" ||
3509 t->content == "!=" ||
3510 t->content == "<=" ||
bb2e3076 3511 t->content == ">="))
2f1a1aea
FCE
3512 {
3513 comparison* e = new comparison;
3514 e->left = op1;
47d349b1 3515 e->op = t->content;
56099f08 3516 e->tok = t;
2f1a1aea 3517 next ();
bb2e3076
FCE
3518 e->right = parse_shift ();
3519 op1 = e;
3520 t = peek ();
3521 }
3522
3523 return op1;
3524}
3525
3526
3527expression*
3528parser::parse_shift ()
3529{
3530 expression* op1 = parse_concatenation ();
3531
3532 const token* t = peek ();
dff50e09 3533 while (t && t->type == tok_operator &&
161f6f7b 3534 (t->content == "<<" || t->content == ">>" || t->content == ">>>"))
bb2e3076
FCE
3535 {
3536 binary_expression* e = new binary_expression;
3537 e->left = op1;
47d349b1 3538 e->op = t->content;
bb2e3076
FCE
3539 e->tok = t;
3540 next ();
56099f08
FCE
3541 e->right = parse_concatenation ();
3542 op1 = e;
3543 t = peek ();
2f1a1aea 3544 }
56099f08
FCE
3545
3546 return op1;
2f1a1aea
FCE
3547}
3548
3549
3550expression*
3551parser::parse_concatenation ()
3552{
3553 expression* op1 = parse_additive ();
3554
3555 const token* t = peek ();
3556 // XXX: the actual awk string-concatenation operator is *whitespace*.
3557 // I don't know how to easily to model that here.
56099f08 3558 while (t && t->type == tok_operator && t->content == ".")
2f1a1aea
FCE
3559 {
3560 concatenation* e = new concatenation;
3561 e->left = op1;
47d349b1 3562 e->op = t->content;
56099f08 3563 e->tok = t;
2f1a1aea 3564 next ();
56099f08
FCE
3565 e->right = parse_additive ();
3566 op1 = e;
3567 t = peek ();
2f1a1aea 3568 }
56099f08
FCE
3569
3570 return op1;
2f1a1aea
FCE
3571}
3572
3573
3574expression*
3575parser::parse_additive ()
3576{
3577 expression* op1 = parse_multiplicative ();
3578
3579 const token* t = peek ();
dff50e09 3580 while (t && t->type == tok_operator
2f1a1aea
FCE
3581 && (t->content == "+" || t->content == "-"))
3582 {
3583 binary_expression* e = new binary_expression;
47d349b1 3584 e->op = t->content;
2f1a1aea 3585 e->left = op1;
56099f08 3586 e->tok = t;
2f1a1aea 3587 next ();
56099f08
FCE
3588 e->right = parse_multiplicative ();
3589 op1 = e;
3590 t = peek ();
2f1a1aea 3591 }
56099f08
FCE
3592
3593 return op1;
2f1a1aea
FCE
3594}
3595
3596
3597expression*
3598parser::parse_multiplicative ()
3599{
3600 expression* op1 = parse_unary ();
3601
3602 const token* t = peek ();
dff50e09 3603 while (t && t->type == tok_operator
2f1a1aea
FCE
3604 && (t->content == "*" || t->content == "/" || t->content == "%"))
3605 {
3606 binary_expression* e = new binary_expression;
47d349b1 3607 e->op = t->content;
2f1a1aea 3608 e->left = op1;
56099f08 3609 e->tok = t;
2f1a1aea 3610 next ();
56099f08
FCE
3611 e->right = parse_unary ();
3612 op1 = e;
3613 t = peek ();
2f1a1aea 3614 }
56099f08
FCE
3615
3616 return op1;
2f1a1aea
FCE
3617}
3618
3619
3620expression*
3621parser::parse_unary ()
3622{
3623 const token* t = peek ();
dff50e09
FCE
3624 if (t && t->type == tok_operator
3625 && (t->content == "+" ||
3626 t->content == "-" ||
bb2e3076
FCE
3627 t->content == "!" ||
3628 t->content == "~" ||
3629 false))
2f1a1aea
FCE
3630 {
3631 unary_expression* e = new unary_expression;
47d349b1 3632 e->op = t->content;
56099f08 3633 e->tok = t;
2f1a1aea 3634 next ();
1cb79a72 3635 e->operand = parse_unary ();
2f1a1aea
FCE
3636 return e;
3637 }
3638 else
bb2e3076 3639 return parse_crement ();
2f1a1aea
FCE
3640}
3641
3642
3643expression*
3644parser::parse_crement () // as in "increment" / "decrement"
3645{
cbfbbf69
FCE
3646 // NB: Ideally, we'd parse only a symbol as an operand to the
3647 // *crement operators, instead of a general expression value. We'd
3648 // need more complex lookahead code to tell apart the postfix cases.
3649 // So we just punt, and leave it to pass-3 to signal errors on
3650 // cases like "4++".
3651
2f1a1aea 3652 const token* t = peek ();
dff50e09 3653 if (t && t->type == tok_operator
2f1a1aea
FCE
3654 && (t->content == "++" || t->content == "--"))
3655 {
3656 pre_crement* e = new pre_crement;
47d349b1 3657 e->op = t->content;
56099f08 3658 e->tok = t;
2f1a1aea 3659 next ();
0fb0cac9 3660 e->operand = parse_dwarf_value ();
2f1a1aea
FCE
3661 return e;
3662 }
3663
3664 // post-crement or non-crement
0fb0cac9 3665 expression *op1 = parse_dwarf_value ();
dff50e09 3666
2f1a1aea 3667 t = peek ();
dff50e09 3668 if (t && t->type == tok_operator
2f1a1aea
FCE
3669 && (t->content == "++" || t->content == "--"))
3670 {
3671 post_crement* e = new post_crement;
47d349b1 3672 e->op = t->content;
56099f08 3673 e->tok = t;
2f1a1aea
FCE
3674 next ();
3675 e->operand = op1;
3676 return e;
3677 }
3678 else
3679 return op1;
3680}
3681
3682
0fb0cac9
JS
3683expression*
3684parser::parse_dwarf_value ()
3685{
3686 expression* expr = NULL;
3687 target_symbol* tsym = NULL;
3688
3689 // With '&' we'll definitely be making a target symbol of some sort
251707c8
JS
3690 const token* addrtok = peek_op ("&") ? next () : NULL;
3691 bool addressof = (addrtok != NULL);
0fb0cac9
JS
3692
3693 // First try target_symbol types: $var, @cast, and @var.
0fb0cac9
JS
3694 const token* t = peek ();
3695 if (t && t->type == tok_identifier && t->content[0] == '$')
3696 expr = tsym = parse_target_symbol ();
3697 else if (tok_is (t, tok_operator, "@cast"))
3698 expr = tsym = parse_cast_op ();
3699 else if (tok_is (t, tok_operator, "@var"))
3700 expr = tsym = parse_atvar_op ();
f8405ea5 3701 else if (addressof && !input.has_version("2.6"))
eff66d40
JS
3702 // '&' on old version only allowed specific target_symbol types
3703 throw PARSE_ERROR (_("expected @cast, @var or $var"));
0fb0cac9 3704 else
4e83b857
YZ
3705 {
3706 // Otherwise just get a plain value of any sort.
3707 expr = parse_value ();
3708 if (addressof)
3709 {
3710 tsym = dynamic_cast<target_symbol*> (expr);
3711 if (tsym && tsym->addressof)
3712 throw PARSE_ERROR (_("cannot take address more than once"),
3713 addrtok);
3714 }
3715 }
0fb0cac9
JS
3716
3717 // If we had '&' or see any target suffixes, that forces a target_symbol.
eff66d40
JS
3718 // For compatibility, we only do this starting with 2.6.
3719 if (!tsym && (addressof || peek_target_symbol_components ())
f8405ea5 3720 && input.has_version("2.6"))
0fb0cac9 3721 {
251707c8
JS
3722 autocast_op *cop = new autocast_op;
3723 cop->tok = addrtok ?: peek ();
0fb0cac9
JS
3724 cop->operand = expr;
3725 expr = tsym = cop;
3726 }
3727
3728 if (tsym)
3729 {
3730 // Parse the rest of any kind of target symbol
3731 tsym->addressof = addressof;
3732 parse_target_symbol_components (tsym);
3733 }
3734
3735 return expr;
3736}
3737
3738
2f1a1aea
FCE
3739expression*
3740parser::parse_value ()
3741{
3742 const token* t = peek ();
3743 if (! t)
f0454224 3744 throw PARSE_ERROR (_("expected value"));
2f1a1aea 3745
7d902887
FCE
3746 if (t->type == tok_embedded)
3747 {
7d902887 3748 if (! privileged)
f0454224 3749 throw PARSE_ERROR (_("embedded expression code in unprivileged script; need stap -g"), false);
7d902887
FCE
3750
3751 embedded_expr *e = new embedded_expr;
3752 e->tok = t;
47d349b1 3753 e->code = t->content;
731a5359 3754 next ();
7d902887
FCE
3755 return e;
3756 }
3757
2f1a1aea
FCE
3758 if (t->type == tok_operator && t->content == "(")
3759 {
731a5359 3760 swallow ();
2f1a1aea
FCE
3761 expression* e = parse_expression ();
3762 t = next ();
3763 if (! (t->type == tok_operator && t->content == ")"))
f0454224 3764 throw PARSE_ERROR (_("expected ')'"));
731a5359 3765 swallow ();
2f1a1aea
FCE
3766 return e;
3767 }
06219d6f
SM
3768 else if (t->type == tok_identifier
3769 || (t->type == tok_operator && t->content[0] == '@'))
2f1a1aea
FCE
3770 return parse_symbol ();
3771 else
3772 return parse_literal ();
3773}
3774
3775
d02548c0 3776const token *
b1f2b0e8 3777parser::parse_hist_op_or_bare_name (hist_op *&hop, interned_string &name)
d02548c0
GH
3778{
3779 hop = NULL;
50cc7cd5 3780 const token* t = expect_ident_or_atword (name);
d02548c0
GH
3781 if (name == "@hist_linear" || name == "@hist_log")
3782 {
3783 hop = new hist_op;
3784 if (name == "@hist_linear")
3785 hop->htype = hist_linear;
3786 else if (name == "@hist_log")
3787 hop->htype = hist_log;
3788 hop->tok = t;
3789 expect_op("(");
3790 hop->stat = parse_expression ();
3791 int64_t tnum;
3792 if (hop->htype == hist_linear)
3793 {
3794 for (size_t i = 0; i < 3; ++i)
3795 {
3796 expect_op (",");
3797 expect_number (tnum);
3798 hop->params.push_back (tnum);
3799 }
3800 }
d02548c0
GH
3801 expect_op(")");
3802 }
3803 return t;
3804}
3805
3806
3807indexable*
3808parser::parse_indexable ()
3809{
3810 hist_op *hop = NULL;
b1f2b0e8 3811 interned_string name;
d02548c0
GH
3812 const token *tok = parse_hist_op_or_bare_name(hop, name);
3813 if (hop)
3814 return hop;
3815 else
3816 {
3817 symbol* sym = new symbol;
3818 sym->name = name;
3819 sym->tok = tok;
3820 return sym;
3821 }
3822}
3823
3824
0fb0cac9
JS
3825// var, indexable[index], func(parms), printf("...", ...),
3826// @defined, @entry, @stat_op(stat)
30263a73 3827expression* parser::parse_symbol ()
2f1a1aea 3828{
d02548c0
GH
3829 hist_op *hop = NULL;
3830 symbol *sym = NULL;
b1f2b0e8 3831 interned_string name;
63ead7fa 3832 unsigned max_params = 0;
d02548c0
GH
3833 const token *t = parse_hist_op_or_bare_name(hop, name);
3834
3835 if (!hop)
0fefb486 3836 {
dff50e09 3837 // If we didn't get a hist_op, then we did get an identifier. We can
d02548c0 3838 // now scrutinize this identifier for the various magic forms of identifier
0fb0cac9 3839 // (printf, @stat_op...)
9b5af295 3840
db135493
FCE
3841 // NB: PR11343: @defined() is not incompatible with earlier versions
3842 // of stap, so no need to check session.compatible for 1.2
30263a73
FCE
3843 if (name == "@defined")
3844 return parse_defined_op (t);
8cc799a5 3845
0a7eb12d
MC
3846 if (name == "@const")
3847 return parse_const_op (t);
3848
8cc799a5
JS
3849 if (name == "@entry")
3850 return parse_entry_op (t);
3851
3689db05
SC
3852 if (name == "@perf")
3853 return parse_perf_op (t);
3854
4c2e691d
JU
3855 if (input.has_version("4.0"))
3856 {
3857 if (name == "@kregister" || name == "@uregister")
3858 return parse_target_register (t);
3859
3860 if (name == "@kderef" || name == "@uderef")
3861 return parse_target_deref (t);
3862 }
3863
cc9001af 3864 if (name.size() > 0 && name[0] == '@')
d7f3e0c5 3865 {
d02548c0
GH
3866 stat_op *sop = new stat_op;
3867 if (name == "@avg")
3868 sop->ctype = sc_average;
63ead7fa
MC
3869 else if (name == "@variance")
3870 sop->ctype = sc_variance, max_params = 1;
d02548c0
GH
3871 else if (name == "@count")
3872 sop->ctype = sc_count;
3873 else if (name == "@sum")
3874 sop->ctype = sc_sum;
3875 else if (name == "@min")
3876 sop->ctype = sc_min;
3877 else if (name == "@max")
3878 sop->ctype = sc_max;
3879 else
b1f2b0e8
JS
3880 throw PARSE_ERROR(_F("unknown operator %s",
3881 name.to_string().c_str()));
d02548c0
GH
3882 expect_op("(");
3883 sop->tok = t;
3884 sop->stat = parse_expression ();
63ead7fa
MC
3885
3886 while(1)
3887 {
3888 t = next ();
3889 if (t && t->type == tok_operator && t->content == ")")
3890 {
3891 swallow ();
3892 break;
3893 }
3894 else if (t && t->type == tok_operator && t->content == ",")
3895 {
3896 if (sop->params.size() >= max_params)
3897 throw PARSE_ERROR(_NF("not more than %d parameter allowed",
3898 "not more than %d parameters allowed",
3899 max_params+1, max_params+1), t);
3900
3901 swallow ();
3902 int64_t tnum;
3903 expect_number (tnum);
3904 sop->params.push_back (tnum);
3905 }
3906 }
d02548c0
GH
3907 return sop;
3908 }
dff50e09 3909
d5e178c1 3910 else if (print_format *fmt = print_format::create(t))
d02548c0 3911 {
d02548c0 3912 expect_op("(");
b15c465c
PP
3913 if ((name == "print" || name == "println" ||
3914 name == "sprint" || name == "sprintln") &&
f34254da 3915 (peek_op("@hist_linear") || peek_op("@hist_log")))
a4636912
GH
3916 {
3917 // We have a special case where we recognize
3918 // print(@hist_foo(bar)) as a magic print-the-histogram
3919 // construct. This is sort of gross but it avoids
3920 // promoting histogram references to typeful
3921 // expressions.
dff50e09 3922
1bbeef03
GH
3923 hop = NULL;
3924 t = parse_hist_op_or_bare_name(hop, name);
3925 assert(hop);
dff50e09 3926
1bbeef03
GH
3927 // It is, sadly, possible that even while parsing a
3928 // hist_op, we *mis-guessed* and the user wishes to
3929 // print(@hist_op(foo)[bucket]), a scalar. In that case
3930 // we must parse the arrayindex and print an expression.
839325a1
JS
3931 //
3932 // XXX: This still fails if the arrayindex is part of a
3933 // larger expression. To really handle everything, we'd
3934 // need to push back all the hist tokens start over.
dff50e09 3935
1bbeef03
GH
3936 if (!peek_op ("["))
3937 fmt->hist = hop;
3938 else
3939 {
3940 // This is simplified version of the
3941 // multi-array-index parser below, because we can
3942 // only ever have one index on a histogram anyways.
3943 expect_op("[");
3944 struct arrayindex* ai = new arrayindex;
3945 ai->tok = t;
3946 ai->base = hop;
3947 ai->indexes.push_back (parse_expression ());
3948 expect_op("]");
3949 fmt->args.push_back(ai);
839325a1
JS
3950
3951 // Consume any subsequent arguments.
3ad8aabf 3952 while (!peek_op(")"))
839325a1 3953 {
3ad8aabf
FL
3954 // ')' is not possible here but we want to output a nicer
3955 // parser error message.
3956 (void) expect_op_any ({",", ")"});
839325a1
JS
3957 expression *e = parse_expression ();
3958 fmt->args.push_back(e);
3959 }
1bbeef03 3960 }
a4636912 3961 }
d7f3e0c5 3962 else
d02548c0 3963 {
3cb17058 3964 int min_args = 0;
80cb29eb 3965 bool consumed_arg = false;
3cb17058
JS
3966 if (fmt->print_with_format)
3967 {
3968 // Consume and convert a format string. Agreement between the
3969 // format string and the arguments is postponed to the
3970 // typechecking phase.
c92d3b42 3971 literal_string* ls = parse_literal_string();
47d349b1 3972 fmt->raw_components = ls->value;
c92d3b42
FCE
3973 delete ls;
3974 fmt->components = print_format::string_to_components (fmt->raw_components);
80cb29eb 3975 consumed_arg = true;
3cb17058
JS
3976 }
3977 else if (fmt->print_with_delim)
3978 {
3979 // Consume a delimiter to separate arguments.
c92d3b42 3980 literal_string* ls = parse_literal_string();
d70e3afe 3981 fmt->delimiter = ls->value;
c92d3b42 3982 delete ls;
80cb29eb
JL
3983 consumed_arg = true;
3984 min_args = 2; // so that the delim is used at least once
3cb17058 3985 }
80cb29eb 3986 else if (!fmt->print_with_newline)
3cb17058 3987 {
80cb29eb
JL
3988 // If we are not printing with a format string, nor with a
3989 // delim, nor with a newline, then it's either print() or
3990 // sprint(), both of which require at least one argument (of
3991 // any type).
3992 min_args = 1;
3cb17058
JS
3993 }
3994
3995 // Consume any subsequent arguments.
3996 while (min_args || !peek_op (")"))
3997 {
f0a49a41
FL
3998 // ')' is not possible here but we want to output a nicer
3999 // parser error message.
80cb29eb 4000 if (consumed_arg)
1efdc9a9 4001 (void) expect_op_any({",", ")"});
3cb17058
JS
4002 expression *e = parse_expression ();
4003 fmt->args.push_back(e);
80cb29eb 4004 consumed_arg = true;
3cb17058
JS
4005 if (min_args)
4006 --min_args;
4007 }
d02548c0 4008 }
3ad8aabf 4009 expect_op(")");
d02548c0
GH
4010 return fmt;
4011 }
dff50e09 4012
d02548c0
GH
4013 else if (peek_op ("(")) // function call
4014 {
731a5359 4015 swallow ();
d02548c0
GH
4016 struct functioncall* f = new functioncall;
4017 f->tok = t;
4018 f->function = name;
4019 // Allow empty actual parameter list
4020 if (peek_op (")"))
4021 {
731a5359 4022 swallow ();
d02548c0
GH
4023 return f;
4024 }
4025 while (1)
4026 {
4027 f->args.push_back (parse_expression ());
1efdc9a9
FL
4028 interned_string op = expect_op_any({")", ","});
4029 if (op == ")")
4030 break;
4031 else if (op == ",")
4032 continue;
d02548c0
GH
4033 }
4034 return f;
4035 }
4036
4037 else
4038 {
4039 sym = new symbol;
4040 sym->name = name;
4041 sym->tok = t;
d7f3e0c5 4042 }
0fefb486 4043 }
dff50e09
FCE
4044
4045 // By now, either we had a hist_op in the first place, or else
d02548c0
GH
4046 // we had a plain word and it was converted to a symbol.
4047
70c743d8 4048 assert (!hop != !sym); // logical XOR
d02548c0
GH
4049
4050 // All that remains is to check for array indexing
4051
d7f3e0c5 4052 if (peek_op ("[")) // array
2f1a1aea 4053 {
731a5359 4054 swallow ();
2f1a1aea 4055 struct arrayindex* ai = new arrayindex;
d02548c0
GH
4056 ai->tok = t;
4057
4058 if (hop)
4059 ai->base = hop;
4060 else
4061 ai->base = sym;
4062
2f1a1aea
FCE
4063 while (1)
4064 {
a98c930b
AJ
4065 if (peek_op("*"))
4066 {
45af9d1b
AJ
4067 swallow();
4068 ai->indexes.push_back (NULL);
a98c930b
AJ
4069 }
4070 else
4071 ai->indexes.push_back (parse_expression ());
1efdc9a9
FL
4072 interned_string op = expect_op_any({"]", ","});
4073 if (op == "]")
4074 break;
616c2df5 4075 else if (op == ",")
1efdc9a9 4076 continue;
2f1a1aea 4077 }
0fb0cac9 4078
2f1a1aea
FCE
4079 return ai;
4080 }
d02548c0
GH
4081
4082 // If we got to here, we *should* have a symbol; if we have
4083 // a hist_op on its own, it doesn't count as an expression,
4084 // so we throw a parse error.
4085
4086 if (hop)
f0454224 4087 throw PARSE_ERROR(_("base histogram operator where expression expected"), t);
dff50e09
FCE
4088
4089 return sym;
2f1a1aea 4090}
56099f08 4091
0fb0cac9
JS
4092// Parse a $var.
4093target_symbol* parser::parse_target_symbol ()
30263a73 4094{
0fb0cac9
JS
4095 const token* t = next ();
4096 if (t->type == tok_identifier && t->content[0]=='$')
d48afc20 4097 {
0fb0cac9
JS
4098 // target_symbol time
4099 target_symbol *tsym = new target_symbol;
4100 tsym->tok = t;
a3e980f9 4101 tsym->name = t->content;
0fb0cac9 4102 return tsym;
d48afc20
JS
4103 }
4104
0fb0cac9
JS
4105 throw PARSE_ERROR (_("expected $var"));
4106}
4107
4108
4109// Parse a @cast.
4110cast_op* parser::parse_cast_op ()
4111{
4112 const token* t = next ();
06219d6f 4113 if (t->type == tok_operator && t->content == "@cast")
30263a73
FCE
4114 {
4115 cast_op *cop = new cast_op;
4116 cop->tok = t;
a3e980f9 4117 cop->name = t->content;
30263a73
FCE
4118 expect_op("(");
4119 cop->operand = parse_expression ();
4120 expect_op(",");
7f6b80bd 4121 expect_unknown(tok_string, cop->type_name);
0fb0cac9
JS
4122 if (cop->type_name.empty())
4123 throw PARSE_ERROR (_("expected non-empty string"));
30263a73
FCE
4124 if (peek_op (","))
4125 {
731a5359 4126 swallow ();
30263a73
FCE
4127 expect_unknown(tok_string, cop->module);
4128 }
4129 expect_op(")");
30263a73
FCE
4130 return cop;
4131 }
4132
0fb0cac9
JS
4133 throw PARSE_ERROR (_("expected @cast"));
4134}
4135
30263a73 4136
0fb0cac9
JS
4137// Parse a @var.
4138atvar_op* parser::parse_atvar_op ()
4139{
4140 const token* t = next ();
06219d6f 4141 if (t->type == tok_operator && t->content == "@var")
cc9001af 4142 {
bd1fcbad
YZ
4143 atvar_op *aop = new atvar_op;
4144 aop->tok = t;
a3e980f9 4145 aop->name = t->content;
cc9001af 4146 expect_op("(");
bd1fcbad
YZ
4147 expect_unknown(tok_string, aop->target_name);
4148 size_t found_at = aop->target_name.find("@");
bfa7e523 4149 if (found_at != string::npos)
bd1fcbad 4150 aop->cu_name = aop->target_name.substr(found_at + 1);
bfa7e523 4151 else
bd1fcbad
YZ
4152 aop->cu_name = "";
4153 if (peek_op (","))
4154 {
4155 swallow ();
4156 expect_unknown (tok_string, aop->module);
4157 }
4158 else
4159 aop->module = "";
cc9001af 4160 expect_op(")");
bd1fcbad 4161 return aop;
cc9001af
MW
4162 }
4163
0fb0cac9 4164 throw PARSE_ERROR (_("expected @var"));
30263a73
FCE
4165}
4166
4167
4168// Parse a @defined(). Given head token has already been consumed.
4169expression* parser::parse_defined_op (const token* t)
4170{
4171 defined_op* dop = new defined_op;
4172 dop->tok = t;
4173 expect_op("(");
0fb0cac9 4174 dop->operand = parse_expression ();
30263a73
FCE
4175 expect_op(")");
4176 return dop;
4177}
4178
4179
0a7eb12d
MC
4180// Parse a @const(). Given head token has already been consumed.
4181expression* parser::parse_const_op (const token* t)
4182{
4183 if (! privileged)
4184 throw PARSE_ERROR (_("using @const operator not permitted; need stap -g"),
4185 false /* don't skip tokens for parse resumption */);
4186
a7b0fd27
MC
4187 interned_string cnst;
4188 embedded_expr *ee = new embedded_expr;
4189 ee->tok = t;
0a7eb12d 4190 expect_op("(");
a7b0fd27
MC
4191 expect_unknown(tok_string, cnst);
4192 if(cnst.empty())
0a7eb12d
MC
4193 throw PARSE_ERROR (_("expected non-empty string"));
4194 expect_op(")");
a7b0fd27
MC
4195 ee->code = string("/* pure */ /* unprivileged */ /* stable */ ") + string(cnst);
4196 return ee;
0a7eb12d
MC
4197}
4198
4199
8cc799a5
JS
4200// Parse a @entry(). Given head token has already been consumed.
4201expression* parser::parse_entry_op (const token* t)
4202{
4203 entry_op* eop = new entry_op;
4204 eop->tok = t;
4205 expect_op("(");
4206 eop->operand = parse_expression ();
4207 expect_op(")");
4208 return eop;
4209}
4210
4211
3689db05
SC
4212// Parse a @perf(). Given head token has already been consumed.
4213expression* parser::parse_perf_op (const token* t)
4214{
4215 perf_op* pop = new perf_op;
4216 pop->tok = t;
4217 expect_op("(");
ace7c23f
FCE
4218 pop->operand = parse_literal_string ();
4219 if (pop->operand->value == "")
f0454224 4220 throw PARSE_ERROR (_("expected non-empty string"));
3689db05
SC
4221 expect_op(")");
4222 return pop;
4223}
4224
4c2e691d
JU
4225// Parse a @kregister or @uregister. Given head token has already been consumed.
4226expression* parser::parse_target_register (const token* t)
4227{
4228 target_register *treg = new target_register;
4229 int64_t regno;
4230 treg->tok = t;
4231 treg->userspace_p = (t->content[1] == 'u');
c664daa9
JU
4232 if (! treg->userspace_p && ! privileged)
4233 throw PARSE_ERROR (_("using @kregister operator not permitted; need stap -g"),
4234 false /* don't skip tokens for parse resumption */);
4c2e691d
JU
4235 expect_op("(");
4236 expect_number(regno);
4237 treg->regno = regno;
4238 expect_op(")");
4239 return treg;
4240}
4241
4242// Parse a @kderef or @uderef. Given head token has already been consumed.
4243expression* parser::parse_target_deref (const token* t)
4244{
4245 target_deref *tderef = new target_deref;
4246 int64_t size;
4247 tderef->tok = t;
4248 tderef->userspace_p = (t->content[1] == 'u');
c664daa9
JU
4249 if (! tderef->userspace_p && ! privileged)
4250 throw PARSE_ERROR (_("using @kderef operator not permitted; need stap -g"),
4251 false /* don't skip tokens for parse resumption */);
4c2e691d
JU
4252 expect_op("(");
4253 expect_number(size);
4254 tderef->size = size;
4255 expect_op(",");
4256 tderef->addr = parse_expression();
4257 expect_op(")");
4258 return tderef;
4259}
3689db05 4260
0fb0cac9
JS
4261bool
4262parser::peek_target_symbol_components ()
4263{
3ddcf938
JS
4264 const token * t = peek ();
4265 return t &&
4266 ((t->type == tok_operator && (t->content == "->" || t->content == "["))
4267 || (t->type == tok_identifier &&
4268 t->content.find_first_not_of('$') == string::npos));
0fb0cac9 4269}
30263a73 4270
81931eab
JS
4271void
4272parser::parse_target_symbol_components (target_symbol* e)
4273{
5f36109e
JS
4274 bool pprint = false;
4275
4276 // check for pretty-print in the form $foo$
47d349b1 4277 string base = e->name;
5f36109e
JS
4278 size_t pprint_pos = base.find_last_not_of('$');
4279 if (0 < pprint_pos && pprint_pos < base.length() - 1)
4280 {
4281 string pprint_val = base.substr(pprint_pos + 1);
4282 base.erase(pprint_pos + 1);
47d349b1 4283 e->name = base;
5f36109e
JS
4284 e->components.push_back (target_symbol::component(e->tok, pprint_val, true));
4285 pprint = true;
4286 }
4287
4288 while (!pprint)
81931eab 4289 {
81931eab
JS
4290 if (peek_op ("->"))
4291 {
c67847a0 4292 const token* t = next();
b1f2b0e8 4293 interned_string member;
c67847a0 4294 expect_ident_or_keyword (member);
5f36109e
JS
4295
4296 // check for pretty-print in the form $foo->$ or $foo->bar$
4297 pprint_pos = member.find_last_not_of('$');
b1f2b0e8 4298 interned_string pprint_val;
5f36109e
JS
4299 if (pprint_pos == string::npos || pprint_pos < member.length() - 1)
4300 {
4301 pprint_val = member.substr(pprint_pos + 1);
b1f2b0e8 4302 member = member.substr(0, pprint_pos + 1);
5f36109e
JS
4303 pprint = true;
4304 }
4305
4306 if (!member.empty())
4307 e->components.push_back (target_symbol::component(t, member));
4308 if (pprint)
4309 e->components.push_back (target_symbol::component(t, pprint_val, true));
81931eab
JS
4310 }
4311 else if (peek_op ("["))
4312 {
c67847a0 4313 const token* t = next();
6fda2dff
JS
4314 expression* index = parse_expression();
4315 literal_number* ln = dynamic_cast<literal_number*>(index);
4316 if (ln)
4317 e->components.push_back (target_symbol::component(t, ln->value));
4318 else
4319 e->components.push_back (target_symbol::component(t, index));
81931eab 4320 expect_op ("]");
81931eab
JS
4321 }
4322 else
4323 break;
4324 }
5f36109e
JS
4325
4326 if (!pprint)
4327 {
4328 // check for pretty-print in the form $foo $
4329 // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$
4330 const token* t = peek();
3819d181 4331 if (t != NULL && t->type == tok_identifier &&
5f36109e
JS
4332 t->content.find_first_not_of('$') == string::npos)
4333 {
4334 t = next();
47d349b1 4335 e->components.push_back (target_symbol::component(t, t->content, true));
5f36109e
JS
4336 pprint = true;
4337 }
4338 }
4339
4340 if (pprint && (peek_op ("->") || peek_op("[")))
f0454224 4341 throw PARSE_ERROR(_("-> and [ are not accepted for a pretty-printing variable"));
81931eab
JS
4342}
4343
73267b89 4344/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.842724 seconds and 5 git commands to generate.