]> sourceware.org Git - systemtap.git/blame - parse.cxx
update copyrights
[systemtap.git] / parse.cxx
CommitLineData
2f1a1aea 1// recursive descent parser for systemtap scripts
ef36f781 2// Copyright (C) 2005-2014 Red Hat Inc.
77a5c1f9 3// Copyright (C) 2006 Intel Corporation.
5811366a 4// Copyright (C) 2007 Bull S.A.S
92585d32 5// Copyright (C) 2014 Peter Kjellstrom <cap@nsc.liu.se>
69c68955
FCE
6//
7// This file is part of systemtap, and is free software. You can
8// redistribute it and/or modify it under the terms of the GNU General
9// Public License (GPL); either version 2, or (at your option) any
10// later version.
2f1a1aea 11
2b066ec1 12#include "config.h"
2f1a1aea
FCE
13#include "staptree.h"
14#include "parse.h"
177a8ead 15#include "session.h"
3f99432c
FCE
16#include "util.h"
17
2b066ec1 18#include <iostream>
eacb10ce 19
2b066ec1 20#include <fstream>
2f1a1aea 21#include <cctype>
9c0c0e46 22#include <cstdlib>
29e64872 23#include <cassert>
9c0c0e46
FCE
24#include <cerrno>
25#include <climits>
57b73400 26#include <sstream>
f74fb737 27#include <cstring>
3f99432c 28#include <cctype>
eacb10ce
FCE
29#include <iterator>
30
7a468d68
FCE
31extern "C" {
32#include <fnmatch.h>
33}
2f1a1aea
FCE
34
35using namespace std;
36
c18f07f8
JS
37
38class lexer
39{
40public:
fee28e5c 41 bool ate_comment; // current token follows a comment
b5477cd9 42 bool ate_whitespace; // the most recent token followed whitespace
534aad8b
SM
43 bool saw_tokens; // the lexer found tokens (before preprocessing occurred)
44
b5477cd9 45 token* scan ();
c18f07f8
JS
46 lexer (istream&, const string&, systemtap_session&);
47 void set_current_file (stapfile* f);
101b0805 48 void set_current_token_chain (const token* tok);
c18f07f8 49
2524d1fd
SM
50 static set<string> keywords;
51 static set<string> atwords;
c18f07f8
JS
52private:
53 inline int input_get ();
54 inline int input_peek (unsigned n=0);
55 void input_put (const string&, const token*);
56 string input_name;
57 string input_contents;
58 const char *input_pointer; // index into input_contents
59 const char *input_end;
60 unsigned cursor_suspend_count;
61 unsigned cursor_suspend_line;
62 unsigned cursor_suspend_column;
63 unsigned cursor_line;
64 unsigned cursor_column;
65 systemtap_session& session;
66 stapfile* current_file;
101b0805 67 const token* current_token_chain;
c18f07f8
JS
68};
69
70
71class parser
72{
73public:
4cd32d8c 74 parser (systemtap_session& s, const string& n, istream& i, bool p);
c18f07f8
JS
75 ~parser ();
76
7ac01ea0 77 stapfile* parse (bool errs_as_warnings);
101b0805 78 probe* parse_synthetic_probe (const token* chain, bool errs_as_warnings);
7ac01ea0 79 stapfile* parse_library_macros (bool errs_as_warnings);
c18f07f8
JS
80
81private:
82 typedef enum {
83 PP_NONE,
84 PP_KEEP_THEN,
85 PP_SKIP_THEN,
86 PP_KEEP_ELSE,
87 PP_SKIP_ELSE,
88 } pp_state_t;
89
534aad8b
SM
90 struct pp1_activation;
91
fe410f52
SM
92 struct pp_macrodecl : public macrodecl {
93 pp1_activation* parent_act; // used for param bindings
94 virtual bool is_closure() { return parent_act != 0; }
95 pp_macrodecl () : macrodecl(), parent_act(0) { }
534aad8b
SM
96 };
97
c18f07f8
JS
98 systemtap_session& session;
99 string input_name;
c18f07f8
JS
100 lexer input;
101 bool privileged;
102 parse_context context;
103
534aad8b
SM
104 // preprocessing subordinate, first pass (macros)
105 struct pp1_activation {
106 const token* tok;
107 unsigned cursor; // position within macro body
108 map<string, pp_macrodecl*> params;
534aad8b 109
fe410f52 110 macrodecl* curr_macro;
534aad8b 111
bdf7707b
JS
112 pp1_activation (const token* tok, macrodecl* curr_macro)
113 : tok(tok), cursor(0), curr_macro(curr_macro) { }
534aad8b
SM
114 ~pp1_activation ();
115 };
116
fe410f52 117 map<string, macrodecl*> pp1_namespace;
534aad8b
SM
118 vector<pp1_activation*> pp1_state;
119 const token* next_pp1 ();
120 const token* scan_pp1 ();
121 const token* slurp_pp1_param (vector<const token*>& param);
122 const token* slurp_pp1_body (vector<const token*>& body);
123
124 // preprocessing subordinate, final pass (conditionals)
c18f07f8 125 vector<pair<const token*, pp_state_t> > pp_state;
b5477cd9 126 const token* scan_pp ();
c18f07f8
JS
127 const token* skip_pp ();
128
129 // scanning state
b5477cd9
SM
130 const token* next ();
131 const token* peek ();
c18f07f8 132
731a5359
MW
133 // Advance past and throw away current token after peek () or next ().
134 void swallow ();
135
a07a2c28 136 const token* systemtap_v_seen;
c18f07f8
JS
137 const token* last_t; // the last value returned by peek() or next()
138 const token* next_t; // lookahead token
139
731a5359
MW
140 // expectations, these swallow the token
141 void expect_known (token_type tt, string const & expected);
142 void expect_unknown (token_type tt, string & target);
143 void expect_unknown2 (token_type tt1, token_type tt2, string & target);
144
145 // convenience forms, these also swallow the token
146 void expect_op (string const & expected);
147 void expect_kw (string const & expected);
148 void expect_number (int64_t & expected);
149 void expect_ident_or_keyword (string & target);
150
151 // convenience forms, which return true or false, these don't swallow token
c18f07f8
JS
152 bool peek_op (string const & op);
153 bool peek_kw (string const & kw);
154
731a5359
MW
155 // convenience forms, which return the token
156 const token* expect_kw_token (string const & expected);
157 const token* expect_ident_or_atword (string & target);
158
7ac01ea0 159 void print_error (const parse_error& pe, bool errs_as_warnings = false);
c18f07f8
JS
160 unsigned num_errors;
161
162private: // nonterminals
163 void parse_probe (vector<probe*>&, vector<probe_alias*>&);
164 void parse_global (vector<vardecl*>&, vector<probe*>&);
165 void parse_functiondecl (vector<functiondecl*>&);
166 embeddedcode* parse_embeddedcode ();
167 probe_point* parse_probe_point ();
d24f1ff4
SM
168 literal_string* consume_string_literals (const token*);
169 literal_string* parse_literal_string ();
c18f07f8
JS
170 literal* parse_literal ();
171 block* parse_stmt_block ();
172 try_block* parse_try_block ();
173 statement* parse_statement ();
174 if_statement* parse_if_statement ();
175 for_loop* parse_for_loop ();
176 for_loop* parse_while_loop ();
177 foreach_loop* parse_foreach_loop ();
178 expr_statement* parse_expr_statement ();
179 return_statement* parse_return_statement ();
180 delete_statement* parse_delete_statement ();
181 next_statement* parse_next_statement ();
182 break_statement* parse_break_statement ();
183 continue_statement* parse_continue_statement ();
184 indexable* parse_indexable ();
185 const token *parse_hist_op_or_bare_name (hist_op *&hop, string &name);
186 target_symbol *parse_target_symbol (const token* t);
8cc799a5 187 expression* parse_entry_op (const token* t);
c18f07f8 188 expression* parse_defined_op (const token* t);
3689db05 189 expression* parse_perf_op (const token* t);
c18f07f8
JS
190 expression* parse_expression ();
191 expression* parse_assignment ();
192 expression* parse_ternary ();
193 expression* parse_logical_or ();
194 expression* parse_logical_and ();
195 expression* parse_boolean_or ();
196 expression* parse_boolean_xor ();
197 expression* parse_boolean_and ();
198 expression* parse_array_in ();
93daaca8 199 expression* parse_comparison_or_regex_query ();
c18f07f8
JS
200 expression* parse_shift ();
201 expression* parse_concatenation ();
202 expression* parse_additive ();
203 expression* parse_multiplicative ();
204 expression* parse_unary ();
205 expression* parse_crement ();
206 expression* parse_value ();
207 expression* parse_symbol ();
208
209 void parse_target_symbol_components (target_symbol* e);
210};
211
212
2f1a1aea
FCE
213// ------------------------------------------------------------------------
214
c18f07f8 215stapfile*
7ac01ea0 216parse (systemtap_session& s, istream& i, bool pr, bool errs_as_warnings)
c18f07f8 217{
4cd32d8c 218 parser p (s, "<input>", i, pr);
7ac01ea0 219 return p.parse (errs_as_warnings);
c18f07f8
JS
220}
221
222
223stapfile*
7ac01ea0 224parse (systemtap_session& s, const string& name, bool pr, bool errs_as_warnings)
c18f07f8 225{
4cd32d8c
JS
226 ifstream i(name.c_str(), ios::in);
227 if (i.fail())
228 {
229 cerr << (file_exists(name)
230 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
231 : _F("Input file '%s' is missing.", name.c_str()))
232 << endl;
233 return 0;
234 }
235
236 parser p (s, name, i, pr);
7ac01ea0 237 return p.parse (errs_as_warnings);
c18f07f8
JS
238}
239
fe410f52 240stapfile*
7ac01ea0 241parse_library_macros (systemtap_session& s, const string& name, bool errs_as_warnings)
fe410f52
SM
242{
243 ifstream i(name.c_str(), ios::in);
244 if (i.fail())
245 {
246 cerr << (file_exists(name)
247 ? _F("Input file '%s' can't be opened for reading.", name.c_str())
248 : _F("Input file '%s' is missing.", name.c_str()))
249 << endl;
250 return 0;
251 }
252
253 parser p (s, name, i, false); // TODOXX pr is ...? should path be full??
7ac01ea0 254 return p.parse_library_macros (errs_as_warnings);
fe410f52
SM
255}
256
101b0805
JS
257probe*
258parse_synthetic_probe (systemtap_session &s, std::istream& i, const token* tok)
259{
260 parser p (s, "<synthetic>", i, false);
261 return p.parse_synthetic_probe (tok, false);
262}
263
c18f07f8 264// ------------------------------------------------------------------------
bb2e3076
FCE
265
266
4cd32d8c
JS
267parser::parser (systemtap_session& s, const string &n, istream& i, bool p):
268 session (s), input_name (n), input (i, input_name, s), privileged (p),
a07a2c28 269 context(con_unknown), systemtap_v_seen(0), last_t (0), next_t (0), num_errors (0)
4cd32d8c
JS
270{
271}
2f1a1aea
FCE
272
273parser::~parser()
274{
2f1a1aea
FCE
275}
276
d7f3e0c5
GH
277static string
278tt2str(token_type tt)
279{
280 switch (tt)
281 {
282 case tok_junk: return "junk";
283 case tok_identifier: return "identifier";
284 case tok_operator: return "operator";
285 case tok_string: return "string";
286 case tok_number: return "number";
287 case tok_embedded: return "embedded-code";
6e213f58 288 case tok_keyword: return "keyword";
d7f3e0c5
GH
289 }
290 return "unknown token";
291}
82919855 292
0323ed4d
WC
293ostream&
294operator << (ostream& o, const source_loc& loc)
295{
a704a23b 296 o << loc.file->name << ":"
0323ed4d
WC
297 << loc.line << ":"
298 << loc.column;
299
300 return o;
301}
302
56099f08
FCE
303ostream&
304operator << (ostream& o, const token& t)
305{
d7f3e0c5 306 o << tt2str(t.type);
56099f08 307
6e213f58 308 if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
56099f08 309 {
24cb178f
FCE
310 o << " '";
311 for (unsigned i=0; i<t.content.length(); i++)
312 {
313 char c = t.content[i];
314 o << (isprint (c) ? c : '?');
315 }
316 o << "'";
56099f08 317 }
56099f08 318
dff50e09 319 o << " at "
0323ed4d 320 << t.location;
56099f08
FCE
321
322 return o;
323}
324
325
dff50e09 326void
7ac01ea0 327parser::print_error (const parse_error &pe, bool errs_as_warnings)
2f1a1aea 328{
16fc963f 329 const token *tok = pe.tok ? pe.tok : last_t;
7ac01ea0 330 session.print_error(pe, tok, input_name, errs_as_warnings);
2f1a1aea
FCE
331 num_errors ++;
332}
333
334
2f1a1aea 335
c434ec7e
FCE
336
337template <typename OPERAND>
338bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs)
339{
340 if (op->type == tok_operator && op->content == "<=")
341 { return lhs <= rhs; }
342 else if (op->type == tok_operator && op->content == ">=")
343 { return lhs >= rhs; }
344 else if (op->type == tok_operator && op->content == "<")
345 { return lhs < rhs; }
346 else if (op->type == tok_operator && op->content == ">")
347 { return lhs > rhs; }
348 else if (op->type == tok_operator && op->content == "==")
349 { return lhs == rhs; }
350 else if (op->type == tok_operator && op->content == "!=")
351 { return lhs != rhs; }
352 else
f0454224 353 throw PARSE_ERROR (_("expected comparison operator"), op);
c434ec7e
FCE
354}
355
356
534aad8b
SM
357// Here, we perform on-the-fly preprocessing in two passes.
358
359// First pass - macro declaration and expansion.
360//
361// The basic form of a declaration is @define SIGNATURE %( BODY %)
362// where SIGNATURE is of the form macro_name (a, b, c, ...)
363// and BODY can obtain the parameter contents as @a, @b, @c, ....
364// Note that parameterless macros can also be declared.
365//
3932c705 366// Macro definitions may not be nested.
534aad8b
SM
367// A macro is available textually after it has been defined.
368//
369// The basic form of a macro invocation
370// for a parameterless macro is @macro_name,
371// for a macro with parameters is @macro_name(param_1, param_2, ...).
372//
26718dbe
SM
373// NB: this means that a parameterless macro @foo called as @foo(a, b, c)
374// leaves its 'parameters' alone, rather than consuming them to result
375// in a "too many parameters error". This may be useful in the unusual
376// case of wanting @foo to expand to the name of a function.
534aad8b
SM
377//
378// Invocations of unknown macros are left unexpanded, to allow
379// the continued use of constructs such as @cast, @var, etc.
380
fe410f52 381macrodecl::~macrodecl ()
534aad8b
SM
382{
383 delete tok;
384 for (vector<const token*>::iterator it = body.begin();
385 it != body.end(); it++)
386 delete *it;
387}
388
389parser::pp1_activation::~pp1_activation ()
390{
391 delete tok;
fe410f52 392 if (curr_macro->is_closure()) return; // body is shared with an earlier declaration
534aad8b
SM
393 for (map<string, pp_macrodecl*>::iterator it = params.begin();
394 it != params.end(); it++)
395 delete it->second;
396}
397
398// Grab a token from the current input source (main file or macro body):
399const token*
400parser::next_pp1 ()
401{
402 if (pp1_state.empty())
403 return input.scan ();
404
405 // otherwise, we're inside a macro
406 pp1_activation* act = pp1_state.back();
407 unsigned& cursor = act->cursor;
408 if (cursor < act->curr_macro->body.size())
409 {
410 token* t = new token(*act->curr_macro->body[cursor]);
0002fc51 411 t->chain = new token(*act->tok); // mark chained token
534aad8b
SM
412 cursor++;
413 return t;
414 }
415 else
416 return 0; // reached end of macro body
417}
418
419const token*
420parser::scan_pp1 ()
421{
422 while (true)
423 {
424 const token* t = next_pp1 ();
425 if (t == 0) // EOF or end of macro body
426 {
427 if (pp1_state.empty()) // actual EOF
428 return 0;
429
430 // Exit macro and loop around to look for the next token.
431 pp1_activation* act = pp1_state.back();
432 pp1_state.pop_back(); delete act;
433 continue;
434 }
435
436 // macro definition
437 if (t->type == tok_operator && t->content == "@define")
438 {
439 if (!pp1_state.empty())
f0454224 440 throw PARSE_ERROR (_("'@define' forbidden inside macro body"), t);
534aad8b
SM
441 delete t;
442
443 // handle macro definition
444 // (1) consume macro signature
3932c705 445 t = input.scan();
534aad8b 446 if (! (t && t->type == tok_identifier))
f0454224 447 throw PARSE_ERROR (_("expected identifier"), t);
534aad8b
SM
448 string name = t->content;
449
450 // check for redefinition of existing macro
451 if (pp1_namespace.find(name) != pp1_namespace.end())
78ab2de3 452 {
f0454224 453 parse_error er (ERR_SRC, _F("attempt to redefine macro '@%s' in the same file", name.c_str ()), t);
78ab2de3
SM
454
455 // Also point to pp1_namespace[name]->tok, the site of
456 // the original definition:
f0454224 457 er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here",
78ab2de3
SM
458 name.c_str()), pp1_namespace[name]->tok);
459 throw er;
460 }
461
462 // XXX: the above restriction was mostly necessary due to
463 // wanting to leave open the possibility of
464 // statically-scoped semantics in the future.
26718dbe
SM
465
466 // XXX: this cascades into further parse errors as the
467 // parser tries to parse the remaining definition... (e.g.
468 // it can't tell that the macro body isn't a conditional,
469 // that the uses of parameters aren't nonexistent
470 // macros.....)
534aad8b 471 if (name == "define")
f0454224 472 throw PARSE_ERROR (_("attempt to redefine '@define'"), t);
534aad8b
SM
473 if (input.atwords.count("@" + name))
474 session.print_warning (_F("macro redefines built-in operator '@%s'", name.c_str()), t);
475
fe410f52
SM
476 macrodecl* decl = (pp1_namespace[name] = new macrodecl);
477 decl->tok = t;
534aad8b
SM
478
479 // determine if the macro takes parameters
4ac28d7e 480 bool saw_params = false;
3932c705
SM
481 t = input.scan();
482 if (t && t->type == tok_operator && t->content == "(")
4ac28d7e
SM
483 {
484 saw_params = true;
485 do
486 {
487 delete t;
488
489 t = input.scan ();
490 if (! (t && t->type == tok_identifier))
f0454224 491 throw PARSE_ERROR(_("expected identifier"), t);
fe410f52 492 decl->formal_args.push_back(t->content);
4ac28d7e
SM
493 delete t;
494
495 t = input.scan ();
496 if (t && t->type == tok_operator && t->content == ",")
497 {
498 continue;
499 }
500 else if (t && t->type == tok_operator && t->content == ")")
501 {
502 delete t;
503 t = input.scan();
504 break;
505 }
506 else
507 {
f0454224 508 throw PARSE_ERROR (_("expected ',' or ')'"), t);
4ac28d7e
SM
509 }
510 }
511 while (true);
512 }
534aad8b
SM
513
514 // (2) identify & consume macro body
3932c705 515 if (! (t && t->type == tok_operator && t->content == "%("))
4ac28d7e
SM
516 {
517 if (saw_params)
f0454224 518 throw PARSE_ERROR (_("expected '%('"), t);
4ac28d7e 519 else
f0454224 520 throw PARSE_ERROR (_("expected '%(' or '('"), t);
4ac28d7e 521 }
3932c705 522 delete t;
534aad8b 523
3932c705
SM
524 t = slurp_pp1_body (decl->body);
525 if (!t)
f0454224 526 throw PARSE_ERROR (_("incomplete macro definition - missing '%)'"), decl->tok);
3932c705 527 delete t;
534aad8b
SM
528
529 // Now loop around to look for a real token.
530 continue;
531 }
532
533 // (potential) macro invocation
534 if (t->type == tok_operator && t->content[0] == '@')
535 {
536 string name = t->content.substr(1); // strip initial '@'
537
538 // check if name refers to a real parameter or macro
fe410f52 539 macrodecl* decl;
534aad8b
SM
540 pp1_activation* act = pp1_state.empty() ? 0 : pp1_state.back();
541 if (act && act->params.find(name) != act->params.end())
542 decl = act->params[name];
fe410f52
SM
543 else if (!(act && act->curr_macro->context == ctx_library)
544 && pp1_namespace.find(name) != pp1_namespace.end())
534aad8b 545 decl = pp1_namespace[name];
fe410f52
SM
546 else if (session.library_macros.find(name)
547 != session.library_macros.end())
548 decl = session.library_macros[name];
534aad8b
SM
549 else // this is an ordinary @operator
550 return t;
551
bdf7707b
JS
552 // handle macro invocation, taking ownership of t
553 pp1_activation *new_act = new pp1_activation(t, decl);
fe410f52 554 unsigned num_params = decl->formal_args.size();
534aad8b
SM
555
556 // (1a) restore parameter invocation closure
fe410f52 557 if (num_params == 0 && decl->is_closure())
534aad8b
SM
558 {
559 // NB: decl->parent_act is always safe since the
560 // parameter decl (if any) comes from an activation
561 // record which deeper in the stack than new_act.
562
563 // decl is a macro parameter which must be evaluated in
564 // the context of the original point of invocation:
fe410f52 565 new_act->params = ((pp_macrodecl*)decl)->parent_act->params;
534aad8b
SM
566 goto expand;
567 }
568
569 // (1b) consume macro parameters (if any)
570 if (num_params == 0)
571 goto expand;
572
573 // for simplicity, we do not allow macro constructs here
574 // -- if we did, we'd have to recursively call scan_pp1()
575 t = next_pp1 ();
39566df2 576 if (! (t && t->type == tok_operator && t->content == "("))
534aad8b
SM
577 {
578 delete new_act;
f0454224 579 throw PARSE_ERROR (_NF
534aad8b 580 ("expected '(' in invocation of macro '@%s'"
f499dee5 581 " taking %d parameter",
534aad8b 582 "expected '(' in invocation of macro '@%s'"
f499dee5 583 " taking %d parameters",
52c2652f 584 num_params, name.c_str(), num_params), t);
534aad8b
SM
585 }
586
587 // XXX perhaps parse/count the full number of params,
588 // so we can say "expected x, found y params" on error?
589 for (unsigned i = 0; i < num_params; i++)
590 {
591 delete t;
592
593 // create parameter closure
fe410f52 594 string param_name = decl->formal_args[i];
534aad8b 595 pp_macrodecl* p = (new_act->params[param_name]
fe410f52
SM
596 = new pp_macrodecl);
597 p->tok = new token(*new_act->tok);
598 p->parent_act = act;
534aad8b
SM
599 // NB: *new_act->tok points to invocation, act is NULL at top level
600
601 t = slurp_pp1_param (p->body);
602
603 // check correct usage of ',' or ')'
604 if (t == 0) // hit unexpected EOF or end of macro
605 {
606 // XXX could we pop the stack and continue parsing
607 // the invocation, allowing macros to construct new
608 // invocations in piecemeal fashion??
609 const token* orig_t = new token(*new_act->tok);
610 delete new_act;
f0454224 611 throw PARSE_ERROR (_("could not find end of macro invocation"), orig_t);
534aad8b
SM
612 }
613 if (t->type == tok_operator && t->content == ",")
614 {
615 if (i + 1 == num_params)
616 {
617 delete new_act;
f0454224 618 throw PARSE_ERROR (_F("too many parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
534aad8b
SM
619 }
620 }
621 else if (t->type == tok_operator && t->content == ")")
622 {
623 if (i + 1 != num_params)
624 {
625 delete new_act;
f0454224 626 throw PARSE_ERROR (_F("too few parameters for macro '@%s' (expected %d)", name.c_str(), num_params), t);
534aad8b
SM
627 }
628 }
629 else
630 {
631 // XXX this is, incidentally, impossible
632 delete new_act;
f0454224 633 throw PARSE_ERROR(_("expected ',' or ')' after macro parameter"), t);
534aad8b
SM
634 }
635 }
636
637 delete t;
638
639 // (2) set up macro expansion
640 expand:
641 pp1_state.push_back (new_act);
642
643 // Now loop around to look for a real token.
644 continue;
645 }
646
647 // Otherwise, we have an ordinary token.
648 return t;
649 }
650}
651
652// Consume a single macro invocation's parameters, heeding nested ( )
653// brackets and stopping on an unbalanced ')' or an unbracketed ','
654// (and returning the final separator token).
655const token*
656parser::slurp_pp1_param (vector<const token*>& param)
657{
658 const token* t = 0;
659 unsigned nesting = 0;
660 do
661 {
662 t = next_pp1 ();
663
664 if (!t)
665 break;
666 if (t->type == tok_operator && t->content == "(")
667 ++nesting;
668 else if (nesting && t->type == tok_operator && t->content == ")")
669 --nesting;
670 else if (!nesting && t->type == tok_operator
671 && (t->content == ")" || t->content == ","))
672 break;
673 param.push_back(t);
674 }
675 while (true);
676 return t; // report ")" or "," or NULL
677}
678
679
680// Consume a macro declaration's body, heeding nested %( %) brackets.
681const token*
682parser::slurp_pp1_body (vector<const token*>& body)
683{
684 const token* t = 0;
685 unsigned nesting = 0;
686 do
687 {
688 t = next_pp1 ();
689
690 if (!t)
691 break;
692 if (t->type == tok_operator && t->content == "%(")
693 ++nesting;
694 else if (nesting && t->type == tok_operator && t->content == "%)")
695 --nesting;
696 else if (!nesting && t->type == tok_operator && t->content == "%)")
697 break;
698 body.push_back(t);
699 }
700 while (true);
701 return t; // report final "%)" or NULL
702}
703
fe410f52
SM
704// Used for parsing .stpm files.
705stapfile*
7ac01ea0 706parser::parse_library_macros (bool errs_as_warnings)
fe410f52
SM
707{
708 stapfile* f = new stapfile;
709 input.set_current_file (f);
710
711 try
712 {
713 const token* t = scan_pp1 ();
714
715 // Currently we only take objection to macro invocations if they
716 // produce a non-whitespace token after being expanded.
717
718 // XXX should we prevent macro invocations even if they expand to empty??
719
720 if (t != 0)
f0454224 721 throw PARSE_ERROR (_F("library macro file '%s' contains non-@define construct", input_name.c_str()), t);
fe410f52
SM
722
723 // We need to first check whether *any* of the macros are duplicates,
724 // then commit to including the entire file in the global namespace
725 // (or not). Yuck.
726 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
727 it != pp1_namespace.end(); it++)
728 {
729 string name = it->first;
730
731 if (session.library_macros.find(name) != session.library_macros.end())
732 {
f0454224
JL
733 parse_error er(ERR_SRC, _F("duplicate definition of library macro '@%s'", name.c_str()), it->second->tok);
734 er.chain = new PARSE_ERROR (_F("macro '@%s' first defined here", name.c_str()), session.library_macros[name]->tok);
78ab2de3 735 print_error (er);
fe410f52 736
78ab2de3 737 delete er.chain;
fe410f52
SM
738 delete f;
739 return 0;
740 }
741 }
742
743 }
744 catch (const parse_error& pe)
745 {
7ac01ea0 746 print_error (pe, errs_as_warnings);
fe410f52
SM
747 delete f;
748 return 0;
749 }
750
751 // If no errors, include the entire file. Note how this is outside
752 // of the try-catch block -- no errors possible.
753 for (map<string, macrodecl*>::iterator it = pp1_namespace.begin();
754 it != pp1_namespace.end(); it++)
755 {
756 string name = it->first;
757
758 session.library_macros[name] = it->second;
759 session.library_macros[name]->context = ctx_library;
fe410f52
SM
760 }
761
762 return f;
763}
764
534aad8b
SM
765// Second pass - preprocessor conditional expansion.
766//
177a8ead 767// The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
44ce8ed5
FCE
768// where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
769// or: arch COMPARISON-OP "arch-string"
db135493 770// or: systemtap_v COMPARISON-OP "version-string"
2e6dd9d0 771// or: systemtap_privilege COMPARISON-OP "privilege-string"
561079c8 772// or: CONFIG_foo COMPARISON-OP "config-string"
717a457b 773// or: CONFIG_foo COMPARISON-OP number
4227f98d 774// or: CONFIG_foo COMPARISON-OP CONFIG_bar
5811366a
FCE
775// or: "string1" COMPARISON-OP "string2"
776// or: number1 COMPARISON-OP number2
44ce8ed5 777// The %: ELSE-TOKENS part is optional.
177a8ead
FCE
778//
779// e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
c434ec7e 780// e.g. %( arch != "i?86" %? "foo" %: "baz" %)
561079c8 781// e.g. %( CONFIG_foo %? "foo" %: "baz" %)
177a8ead
FCE
782//
783// Up to an entire %( ... %) expression is processed by a single call
784// to this function. Tokens included by any nested conditions are
785// enqueued in a private vector.
786
787bool eval_pp_conditional (systemtap_session& s,
788 const token* l, const token* op, const token* r)
789{
44ce8ed5 790 if (l->type == tok_identifier && (l->content == "kernel_v" ||
db135493
FCE
791 l->content == "kernel_vr" ||
792 l->content == "systemtap_v"))
44ce8ed5 793 {
db135493 794 if (! (r->type == tok_string))
f0454224 795 throw PARSE_ERROR (_("expected string literal"), r);
db135493 796
44ce8ed5 797 string target_kernel_vr = s.kernel_release;
197a4d62 798 string target_kernel_v = s.kernel_base_release;
db135493 799 string target;
dff50e09 800
db135493
FCE
801 if (l->content == "kernel_v") target = target_kernel_v;
802 else if (l->content == "kernel_vr") target = target_kernel_vr;
803 else if (l->content == "systemtap_v") target = s.compatible;
804 else assert (0);
7a468d68 805
7a468d68
FCE
806 string query = r->content;
807 bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
808
44ce8ed5
FCE
809 // collect acceptable strverscmp results.
810 int rvc_ok1, rvc_ok2;
7a468d68 811 bool wc_ok = false;
44ce8ed5
FCE
812 if (op->type == tok_operator && op->content == "<=")
813 { rvc_ok1 = -1; rvc_ok2 = 0; }
814 else if (op->type == tok_operator && op->content == ">=")
815 { rvc_ok1 = 1; rvc_ok2 = 0; }
816 else if (op->type == tok_operator && op->content == "<")
817 { rvc_ok1 = -1; rvc_ok2 = -1; }
818 else if (op->type == tok_operator && op->content == ">")
819 { rvc_ok1 = 1; rvc_ok2 = 1; }
820 else if (op->type == tok_operator && op->content == "==")
7a468d68 821 { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
44ce8ed5 822 else if (op->type == tok_operator && op->content == "!=")
7a468d68 823 { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
44ce8ed5 824 else
f0454224 825 throw PARSE_ERROR (_("expected comparison operator"), op);
7a468d68
FCE
826
827 if ((!wc_ok) && rhs_wildcard)
f0454224 828 throw PARSE_ERROR (_("wildcard not allowed with order comparison operators"), op);
7a468d68
FCE
829
830 if (rhs_wildcard)
831 {
832 int rvc_result = fnmatch (query.c_str(), target.c_str(),
833 FNM_NOESCAPE); // spooky
834 bool badness = (rvc_result == 0) ^ (op->content == "==");
835 return !badness;
836 }
837 else
838 {
839 int rvc_result = strverscmp (target.c_str(), query.c_str());
840 // normalize rvc_result
841 if (rvc_result < 0) rvc_result = -1;
842 if (rvc_result > 0) rvc_result = 1;
843 return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
844 }
44ce8ed5 845 }
2e6dd9d0
SM
846 else if (l->type == tok_identifier && l->content == "systemtap_privilege")
847 {
848 string target_privilege =
cba5b802
SM
849 pr_contains(s.privilege, pr_stapdev) ? "stapdev"
850 : pr_contains(s.privilege, pr_stapsys) ? "stapsys"
851 : pr_contains(s.privilege, pr_stapusr) ? "stapusr"
2e6dd9d0
SM
852 : "none"; /* should be impossible -- s.privilege always one of above */
853 assert(target_privilege != "none");
854
855 if (! (r->type == tok_string))
f0454224 856 throw PARSE_ERROR (_("expected string literal"), r);
2e6dd9d0
SM
857 string query_privilege = r->content;
858
859 bool nomatch = (target_privilege != query_privilege);
860
861 bool result;
862 if (op->type == tok_operator && op->content == "==")
863 result = !nomatch;
864 else if (op->type == tok_operator && op->content == "!=")
865 result = nomatch;
866 else
f0454224 867 throw PARSE_ERROR (_("expected '==' or '!='"), op);
cba5b802 868 /* XXX perhaps allow <= >= and similar comparisons */
2e6dd9d0
SM
869
870 return result;
871 }
92585d32
PK
872 else if (l->type == tok_identifier && l->content == "guru_mode")
873 {
874 if (! (r->type == tok_number))
875 throw PARSE_ERROR (_("expected number"), r);
876 int64_t lhs = (int64_t) s.guru_mode;
877 int64_t rhs = lex_cast<int64_t>(r->content);
878 if (!((rhs == 0)||(rhs == 1)))
879 throw PARSE_ERROR (_("expected 0 or 1"), op);
880 if (!((op->type == tok_operator && op->content == "==") ||
881 (op->type == tok_operator && op->content == "!=")))
882 throw PARSE_ERROR (_("expected '==' or '!='"), op);
883
884 return eval_comparison (lhs, op, rhs);
885 }
44ce8ed5
FCE
886 else if (l->type == tok_identifier && l->content == "arch")
887 {
888 string target_architecture = s.architecture;
889 if (! (r->type == tok_string))
f0454224 890 throw PARSE_ERROR (_("expected string literal"), r);
44ce8ed5 891 string query_architecture = r->content;
dff50e09 892
7a468d68
FCE
893 int nomatch = fnmatch (query_architecture.c_str(),
894 target_architecture.c_str(),
895 FNM_NOESCAPE); // still spooky
896
561079c8
FCE
897 bool result;
898 if (op->type == tok_operator && op->content == "==")
899 result = !nomatch;
900 else if (op->type == tok_operator && op->content == "!=")
901 result = nomatch;
902 else
f0454224 903 throw PARSE_ERROR (_("expected '==' or '!='"), op);
561079c8 904
d9677d7b
DS
905 return result;
906 }
907 else if (l->type == tok_identifier && l->content == "runtime")
908 {
909 if (! (r->type == tok_string))
f0454224 910 throw PARSE_ERROR (_("expected string literal"), r);
d9677d7b
DS
911
912 string query_runtime = r->content;
913 string target_runtime;
914
915 target_runtime = (s.runtime_mode == systemtap_session::dyninst_runtime
916 ? "dyninst" : "kernel");
917 int nomatch = fnmatch (query_runtime.c_str(),
918 target_runtime.c_str(),
919 FNM_NOESCAPE); // still spooky
920
921 bool result;
922 if (op->type == tok_operator && op->content == "==")
923 result = !nomatch;
924 else if (op->type == tok_operator && op->content == "!=")
925 result = nomatch;
926 else
f0454224 927 throw PARSE_ERROR (_("expected '==' or '!='"), op);
d9677d7b 928
561079c8
FCE
929 return result;
930 }
60d98537 931 else if (l->type == tok_identifier && startswith(l->content, "CONFIG_"))
561079c8 932 {
717a457b
MW
933 if (r->type == tok_string)
934 {
935 string lhs = s.kernel_config[l->content]; // may be empty
936 string rhs = r->content;
561079c8 937
717a457b 938 int nomatch = fnmatch (rhs.c_str(), lhs.c_str(), FNM_NOESCAPE); // still spooky
561079c8 939
717a457b
MW
940 bool result;
941 if (op->type == tok_operator && op->content == "==")
942 result = !nomatch;
943 else if (op->type == tok_operator && op->content == "!=")
944 result = nomatch;
945 else
f0454224 946 throw PARSE_ERROR (_("expected '==' or '!='"), op);
dff50e09 947
717a457b
MW
948 return result;
949 }
950 else if (r->type == tok_number)
951 {
952 const char* startp = s.kernel_config[l->content].c_str ();
953 char* endp = (char*) startp;
954 errno = 0;
955 int64_t lhs = (int64_t) strtoll (startp, & endp, 0);
956 if (errno == ERANGE || errno == EINVAL || *endp != '\0')
f0454224 957 throw PARSE_ERROR ("Config option value not a number", l);
717a457b
MW
958
959 int64_t rhs = lex_cast<int64_t>(r->content);
960 return eval_comparison (lhs, op, rhs);
961 }
4227f98d 962 else if (r->type == tok_identifier
60d98537 963 && startswith(r->content, "CONFIG_"))
4227f98d
MW
964 {
965 // First try to convert both to numbers,
966 // otherwise threat both as strings.
967 const char* startp = s.kernel_config[l->content].c_str ();
968 char* endp = (char*) startp;
969 errno = 0;
970 int64_t val = (int64_t) strtoll (startp, & endp, 0);
971 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
972 {
973 int64_t lhs = val;
974 startp = s.kernel_config[r->content].c_str ();
975 endp = (char*) startp;
976 errno = 0;
977 int64_t rhs = (int64_t) strtoll (startp, & endp, 0);
978 if (errno != ERANGE && errno != EINVAL && *endp == '\0')
979 return eval_comparison (lhs, op, rhs);
980 }
981
982 string lhs = s.kernel_config[l->content];
983 string rhs = s.kernel_config[r->content];
984 return eval_comparison (lhs, op, rhs);
985 }
717a457b 986 else
f0454224 987 throw PARSE_ERROR (_("expected string, number literal or other CONFIG_... as right side operand"), r);
dff50e09 988 }
c434ec7e 989 else if (l->type == tok_string && r->type == tok_string)
5811366a 990 {
c434ec7e
FCE
991 string lhs = l->content;
992 string rhs = r->content;
993 return eval_comparison (lhs, op, rhs);
994 // NB: no wildcarding option here
995 }
996 else if (l->type == tok_number && r->type == tok_number)
997 {
998 int64_t lhs = lex_cast<int64_t>(l->content);
999 int64_t rhs = lex_cast<int64_t>(r->content);
1000 return eval_comparison (lhs, op, rhs);
7a468d68 1001 // NB: no wildcarding option here
5811366a
FCE
1002 }
1003 else if (l->type == tok_string && r->type == tok_number
1004 && op->type == tok_operator)
f0454224 1005 throw PARSE_ERROR (_("expected string literal as right value"), r);
5811366a
FCE
1006 else if (l->type == tok_number && r->type == tok_string
1007 && op->type == tok_operator)
f0454224 1008 throw PARSE_ERROR (_("expected number literal as right value"), r);
c434ec7e 1009
177a8ead 1010 else
f0454224 1011 throw PARSE_ERROR (_("expected 'arch', 'kernel_v', 'kernel_vr', 'systemtap_v',\n"
d9677d7b
DS
1012 " 'runtime', 'systemtap_privilege', 'CONFIG_...', or\n"
1013 " comparison between strings or integers"), l);
177a8ead
FCE
1014}
1015
1016
5811366a 1017// Only tokens corresponding to the TRUE statement must be expanded
177a8ead 1018const token*
b5477cd9 1019parser::scan_pp ()
177a8ead
FCE
1020{
1021 while (true)
1022 {
e92f2566
JS
1023 pp_state_t pp = PP_NONE;
1024 if (!pp_state.empty())
1025 pp = pp_state.back().second;
1026
1027 const token* t = 0;
1028 if (pp == PP_SKIP_THEN || pp == PP_SKIP_ELSE)
1029 t = skip_pp ();
1030 else
534aad8b 1031 t = scan_pp1 ();
e92f2566
JS
1032
1033 if (t == 0) // EOF
177a8ead 1034 {
e92f2566
JS
1035 if (pp != PP_NONE)
1036 {
1037 t = pp_state.back().first;
1038 pp_state.pop_back(); // so skip_some doesn't keep trying to close this
ce0f6648 1039 //TRANSLATORS: 'conditional' meaning 'conditional preprocessing'
f0454224 1040 throw PARSE_ERROR (_("incomplete conditional at end of file"), t);
e92f2566 1041 }
177a8ead
FCE
1042 return t;
1043 }
1044
e92f2566
JS
1045 // misplaced preprocessor "then"
1046 if (t->type == tok_operator && t->content == "%?")
f0454224 1047 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
e92f2566
JS
1048
1049 // preprocessor "else"
1050 if (t->type == tok_operator && t->content == "%:")
1051 {
1052 if (pp == PP_NONE)
f0454224 1053 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
e92f2566 1054 if (pp == PP_KEEP_ELSE || pp == PP_SKIP_ELSE)
f0454224 1055 throw PARSE_ERROR (_("invalid conditional - duplicate '%:'"), t);
1d94e4e5
SM
1056 // XXX: here and elsewhere, error cascades might be avoided
1057 // by dropping tokens until we reach the closing %)
e92f2566
JS
1058
1059 pp_state.back().second = (pp == PP_KEEP_THEN) ?
1060 PP_SKIP_ELSE : PP_KEEP_ELSE;
1061 delete t;
1062 continue;
1063 }
1064
1065 // preprocessor close
1066 if (t->type == tok_operator && t->content == "%)")
1067 {
1068 if (pp == PP_NONE)
f0454224 1069 throw PARSE_ERROR (_("incomplete conditional - missing '%('"), t);
e92f2566 1070 delete pp_state.back().first;
a07a2c28 1071 delete t; //this is the closing bracket
e92f2566
JS
1072 pp_state.pop_back();
1073 continue;
1074 }
dff50e09 1075
177a8ead
FCE
1076 if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
1077 return t;
1078
1079 // We have a %( - it's time to throw a preprocessing party!
1080
2d7881bf
PP
1081 bool result = false;
1082 bool and_result = true;
1083 const token *n = NULL;
1084 do {
1085 const token *l, *op, *r;
534aad8b
SM
1086 l = scan_pp1 ();
1087 op = scan_pp1 ();
1088 r = scan_pp1 ();
2d7881bf 1089 if (l == 0 || op == 0 || r == 0)
f0454224 1090 throw PARSE_ERROR (_("incomplete condition after '%('"), t);
2d7881bf
PP
1091 // NB: consider generalizing to consume all tokens until %?, and
1092 // passing that as a vector to an evaluator.
1093
1094 // Do not evaluate the condition if we haven't expanded everything.
1095 // This may occur when having several recursive conditionals.
1096 and_result &= eval_pp_conditional (session, l, op, r);
a07a2c28
LB
1097 if(l->content=="systemtap_v")
1098 systemtap_v_seen=r;
1099
1100 else
1101 delete r;
1102
2d7881bf
PP
1103 delete l;
1104 delete op;
2d7881bf
PP
1105 delete n;
1106
534aad8b 1107 n = scan_pp1 ();
2d7881bf
PP
1108 if (n && n->type == tok_operator && n->content == "&&")
1109 continue;
1110 result |= and_result;
1111 and_result = true;
1112 if (! (n && n->type == tok_operator && n->content == "||"))
1113 break;
1114 } while (true);
3f847830
FCE
1115
1116 /*
1117 clog << "PP eval (" << *t << ") == " << result << endl;
1118 */
1119
e92f2566 1120 const token *m = n;
177a8ead 1121 if (! (m && m->type == tok_operator && m->content == "%?"))
f0454224 1122 throw PARSE_ERROR (_("expected '%?' marker for conditional"), t);
70c743d8 1123 delete m; // "%?"
177a8ead 1124
e92f2566
JS
1125 pp = result ? PP_KEEP_THEN : PP_SKIP_THEN;
1126 pp_state.push_back (make_pair (t, pp));
3f847830 1127
e92f2566
JS
1128 // Now loop around to look for a real token.
1129 }
1130}
3f847830 1131
3f847830 1132
e92f2566
JS
1133// Skip over tokens and any errors, heeding
1134// only nested preprocessor starts and ends.
1135const token*
1136parser::skip_pp ()
1137{
1138 const token* t = 0;
1139 unsigned nesting = 0;
1140 do
1141 {
1142 try
1143 {
534aad8b 1144 t = scan_pp1 ();
177a8ead 1145 }
e92f2566 1146 catch (const parse_error &e)
70c743d8 1147 {
e92f2566 1148 continue;
70c743d8 1149 }
e92f2566
JS
1150 if (!t)
1151 break;
1152 if (t->type == tok_operator && t->content == "%(")
1153 ++nesting;
1154 else if (nesting && t->type == tok_operator && t->content == "%)")
1155 --nesting;
1156 else if (!nesting && t->type == tok_operator &&
1157 (t->content == "%:" || t->content == "%?" || t->content == "%)"))
1158 break;
1159 delete t;
177a8ead 1160 }
e92f2566
JS
1161 while (true);
1162 return t;
177a8ead
FCE
1163}
1164
1165
2f1a1aea 1166const token*
b5477cd9 1167parser::next ()
2f1a1aea
FCE
1168{
1169 if (! next_t)
b5477cd9 1170 next_t = scan_pp ();
2f1a1aea 1171 if (! next_t)
f0454224 1172 throw PARSE_ERROR (_("unexpected end-of-file"));
2f1a1aea 1173
2f1a1aea
FCE
1174 last_t = next_t;
1175 // advance by zeroing next_t
1176 next_t = 0;
1177 return last_t;
1178}
1179
1180
1181const token*
b5477cd9 1182parser::peek ()
2f1a1aea
FCE
1183{
1184 if (! next_t)
b5477cd9 1185 next_t = scan_pp ();
2f1a1aea
FCE
1186
1187 // don't advance by zeroing next_t
1188 last_t = next_t;
1189 return next_t;
1190}
1191
1192
731a5359
MW
1193void
1194parser::swallow ()
1195{
1196 // can only swallow something last peeked or nexted token.
1197 assert (last_t != 0);
1198 delete last_t;
1199 // advance by zeroing next_t
1200 last_t = next_t = 0;
1201}
1202
1203
d7f3e0c5
GH
1204static inline bool
1205tok_is(token const * t, token_type tt, string const & expected)
1206{
1207 return t && t->type == tt && t->content == expected;
1208}
1209
1210
731a5359 1211void
d7f3e0c5
GH
1212parser::expect_known (token_type tt, string const & expected)
1213{
1214 const token *t = next();
57b73400 1215 if (! (t && t->type == tt && t->content == expected))
f0454224 1216 throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
731a5359 1217 swallow (); // We are done with it, content was copied.
d7f3e0c5
GH
1218}
1219
1220
731a5359 1221void
d7f3e0c5
GH
1222parser::expect_unknown (token_type tt, string & target)
1223{
1224 const token *t = next();
1225 if (!(t && t->type == tt))
f0454224 1226 throw PARSE_ERROR (_("expected ") + tt2str(tt));
d7f3e0c5 1227 target = t->content;
731a5359 1228 swallow (); // We are done with it, content was copied.
d7f3e0c5
GH
1229}
1230
1231
731a5359 1232void
493ee224
DS
1233parser::expect_unknown2 (token_type tt1, token_type tt2, string & target)
1234{
1235 const token *t = next();
1236 if (!(t && (t->type == tt1 || t->type == tt2)))
f0454224 1237 throw PARSE_ERROR (_F("expected %s or %s", tt2str(tt1).c_str(), tt2str(tt2).c_str()));
493ee224 1238 target = t->content;
731a5359 1239 swallow (); // We are done with it, content was copied.
493ee224
DS
1240}
1241
1242
731a5359 1243void
d7f3e0c5
GH
1244parser::expect_op (std::string const & expected)
1245{
731a5359 1246 expect_known (tok_operator, expected);
d7f3e0c5
GH
1247}
1248
1249
731a5359 1250void
d7f3e0c5
GH
1251parser::expect_kw (std::string const & expected)
1252{
731a5359 1253 expect_known (tok_keyword, expected);
d7f3e0c5
GH
1254}
1255
dff50e09 1256const token*
731a5359
MW
1257parser::expect_kw_token (std::string const & expected)
1258{
1259 const token *t = next();
1260 if (! (t && t->type == tok_keyword && t->content == expected))
f0454224 1261 throw PARSE_ERROR (_F("expected '%s'", expected.c_str()));
731a5359
MW
1262 return t;
1263}
1264
1265void
e38723d2 1266parser::expect_number (int64_t & value)
57b73400 1267{
e38723d2
MH
1268 bool neg = false;
1269 const token *t = next();
1270 if (t->type == tok_operator && t->content == "-")
1271 {
1272 neg = true;
731a5359 1273 swallow ();
e38723d2
MH
1274 t = next ();
1275 }
1276 if (!(t && t->type == tok_number))
f0454224 1277 throw PARSE_ERROR (_("expected number"));
e38723d2
MH
1278
1279 const char* startp = t->content.c_str ();
1280 char* endp = (char*) startp;
1281
1282 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1283 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1284 // since the lexer only gives us positive digit strings, but we'll
1285 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1286 errno = 0;
1287 value = (int64_t) strtoull (startp, & endp, 0);
1288 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
1289 || (neg && (unsigned long long) value > 9223372036854775808ULL)
1290 || (unsigned long long) value > 18446744073709551615ULL
1291 || value < -9223372036854775807LL-1)
f0454224 1292 throw PARSE_ERROR (_("number invalid or out of range"));
dff50e09 1293
e38723d2
MH
1294 if (neg)
1295 value = -value;
1296
731a5359 1297 swallow (); // We are done with it, content was parsed and copied into value.
57b73400
GH
1298}
1299
d7f3e0c5 1300
dff50e09 1301const token*
50cc7cd5 1302parser::expect_ident_or_atword (std::string & target)
d7f3e0c5 1303{
06219d6f
SM
1304 const token *t = next();
1305
1306 // accept identifiers and operators beginning in '@':
1307 if (!t || (t->type != tok_identifier
1308 && (t->type != tok_operator || t->content[0] != '@')))
1309 // XXX currently this is only called from parse_hist_op_or_bare_name(),
1310 // so the message is accurate, but keep an eye out in the future:
f0454224 1311 throw PARSE_ERROR (_F("expected %s or statistical operation", tt2str(tok_identifier).c_str()));
06219d6f
SM
1312
1313 target = t->content;
1314 return t;
d7f3e0c5
GH
1315}
1316
1317
731a5359 1318void
493ee224
DS
1319parser::expect_ident_or_keyword (std::string & target)
1320{
731a5359 1321 expect_unknown2 (tok_identifier, tok_keyword, target);
493ee224
DS
1322}
1323
1324
dff50e09 1325bool
d7f3e0c5
GH
1326parser::peek_op (std::string const & op)
1327{
1328 return tok_is (peek(), tok_operator, op);
1329}
1330
1331
dff50e09 1332bool
d7f3e0c5
GH
1333parser::peek_kw (std::string const & kw)
1334{
1335 return tok_is (peek(), tok_identifier, kw);
1336}
1337
1338
1339
66c7d4c1 1340lexer::lexer (istream& input, const string& in, systemtap_session& s):
03ba36d9
SM
1341 ate_comment(false), ate_whitespace(false), saw_tokens(false),
1342 input_name (in), input_pointer (0), input_end (0), cursor_suspend_count(0),
1343 cursor_suspend_line (1), cursor_suspend_column (1), cursor_line (1),
101b0805 1344 cursor_column (1), session(s), current_file (0), current_token_chain (0)
eacb10ce 1345{
66c7d4c1 1346 getline(input, input_contents, '\0');
2203b032 1347
66c7d4c1
JS
1348 input_pointer = input_contents.data();
1349 input_end = input_contents.data() + input_contents.size();
1350
1351 if (keywords.empty())
1352 {
3a7ec735
FCE
1353 // NB: adding new keywords is highly disruptive to the language,
1354 // in particular to existing scripts that could be suddenly
1355 // broken. If done at all, it has to be s.compatible-sensitive,
1356 // and broadly advertised.
66c7d4c1
JS
1357 keywords.insert("probe");
1358 keywords.insert("global");
1359 keywords.insert("function");
1360 keywords.insert("if");
1361 keywords.insert("else");
1362 keywords.insert("for");
1363 keywords.insert("foreach");
1364 keywords.insert("in");
1365 keywords.insert("limit");
1366 keywords.insert("return");
1367 keywords.insert("delete");
1368 keywords.insert("while");
1369 keywords.insert("break");
1370 keywords.insert("continue");
1371 keywords.insert("next");
1372 keywords.insert("string");
1373 keywords.insert("long");
f4fe2e93
FCE
1374 keywords.insert("try");
1375 keywords.insert("catch");
66c7d4c1 1376 }
2524d1fd
SM
1377
1378 if (atwords.empty())
1379 {
1380 // NB: adding new @words is mildly disruptive to existing
1381 // scripts that define macros with the same name, but not
1382 // really. The user will merely receive a warning that they are
1383 // redefining an existing operator.
1384 atwords.insert("@cast");
1385 atwords.insert("@defined");
1386 atwords.insert("@entry");
aeeb8b2c 1387 atwords.insert("@perf");
2524d1fd
SM
1388 atwords.insert("@var");
1389 atwords.insert("@avg");
1390 atwords.insert("@count");
1391 atwords.insert("@sum");
1392 atwords.insert("@min");
1393 atwords.insert("@max");
1394 atwords.insert("@hist_linear");
1395 atwords.insert("@hist_log");
1396 }
eacb10ce 1397}
2f1a1aea 1398
66c7d4c1 1399set<string> lexer::keywords;
2524d1fd 1400set<string> lexer::atwords;
66c7d4c1 1401
1b1b4ceb
RA
1402void
1403lexer::set_current_file (stapfile* f)
1404{
1405 current_file = f;
2203b032
JS
1406 if (f)
1407 {
1408 f->file_contents = input_contents;
1409 f->name = input_name;
1410 }
1b1b4ceb 1411}
bb2e3076 1412
101b0805
JS
1413void
1414lexer::set_current_token_chain (const token* tok)
1415{
1416 current_token_chain = tok;
1417}
1418
bb2e3076
FCE
1419int
1420lexer::input_peek (unsigned n)
1421{
66c7d4c1
JS
1422 if (input_pointer + n >= input_end)
1423 return -1; // EOF
1424 return (unsigned char)*(input_pointer + n);
bb2e3076
FCE
1425}
1426
1427
dff50e09 1428int
2f1a1aea
FCE
1429lexer::input_get ()
1430{
66c7d4c1 1431 int c = input_peek();
bb2e3076
FCE
1432 if (c < 0) return c; // EOF
1433
66c7d4c1
JS
1434 ++input_pointer;
1435
3f99432c 1436 if (cursor_suspend_count)
9300f661
JS
1437 {
1438 // Track effect of input_put: preserve previous cursor/line_column
1439 // until all of its characters are consumed.
1440 if (--cursor_suspend_count == 0)
1441 {
1442 cursor_line = cursor_suspend_line;
1443 cursor_column = cursor_suspend_column;
1444 }
1445 }
3f99432c 1446 else
2f1a1aea 1447 {
3f99432c
FCE
1448 // update source cursor
1449 if (c == '\n')
1450 {
1451 cursor_line ++;
1452 cursor_column = 1;
1453 }
1454 else
1455 cursor_column ++;
2f1a1aea 1456 }
2f1a1aea 1457
eacb10ce 1458 // clog << "[" << (char)c << "]";
2f1a1aea
FCE
1459 return c;
1460}
1461
1462
3f99432c 1463void
9300f661 1464lexer::input_put (const string& chars, const token* t)
3f99432c 1465{
66c7d4c1
JS
1466 size_t pos = input_pointer - input_contents.data();
1467 // clog << "[put:" << chars << " @" << pos << "]";
1468 input_contents.insert (pos, chars);
eacb10ce 1469 cursor_suspend_count += chars.size();
9300f661
JS
1470 cursor_suspend_line = cursor_line;
1471 cursor_suspend_column = cursor_column;
1472 cursor_line = t->location.line;
1473 cursor_column = t->location.column;
66c7d4c1
JS
1474 input_pointer = input_contents.data() + pos;
1475 input_end = input_contents.data() + input_contents.size();
3f99432c
FCE
1476}
1477
1478
2f1a1aea 1479token*
b5477cd9 1480lexer::scan ()
2f1a1aea 1481{
fee28e5c 1482 ate_comment = false; // reset for each new token
b5477cd9 1483 ate_whitespace = false; // reset for each new token
534aad8b
SM
1484
1485 // XXX be very sure to restore old_saw_tokens if we return without a token:
1486 bool old_saw_tokens = saw_tokens;
1487 saw_tokens = true;
1488
2f1a1aea 1489 token* n = new token;
2203b032 1490 n->location.file = current_file;
101b0805 1491 n->chain = current_token_chain;
2f1a1aea 1492
9300f661
JS
1493skip:
1494 bool suspended = (cursor_suspend_count > 0);
2f1a1aea
FCE
1495 n->location.line = cursor_line;
1496 n->location.column = cursor_column;
1497
1498 int c = input_get();
3f99432c 1499 // clog << "{" << (char)c << (char)c2 << "}";
2f1a1aea
FCE
1500 if (c < 0)
1501 {
1502 delete n;
534aad8b 1503 saw_tokens = old_saw_tokens;
2f1a1aea
FCE
1504 return 0;
1505 }
1506
1507 if (isspace (c))
b5477cd9
SM
1508 {
1509 ate_whitespace = true;
1510 goto skip;
1511 }
2f1a1aea 1512
66c7d4c1
JS
1513 int c2 = input_peek ();
1514
3f99432c
FCE
1515 // Paste command line arguments as character streams into
1516 // the beginning of a token. $1..$999 go through as raw
1517 // characters; @1..@999 are quoted/escaped as strings.
1518 // $# and @# expand to the number of arguments, similarly
1519 // raw or quoted.
9300f661 1520 if ((c == '$' || c == '@') && (c2 == '#'))
3f99432c 1521 {
9300f661
JS
1522 n->content.push_back (c);
1523 n->content.push_back (c2);
3f99432c 1524 input_get(); // swallow '#'
9300f661 1525 if (suspended)
16fc963f
SM
1526 {
1527 n->make_junk(_("invalid nested substitution of command line arguments"));
1528 return n;
1529 }
9300f661
JS
1530 size_t num_args = session.args.size ();
1531 input_put ((c == '$') ? lex_cast (num_args) : lex_cast_qstring (num_args), n);
1532 n->content.clear();
1533 goto skip;
3f99432c 1534 }
9300f661 1535 else if ((c == '$' || c == '@') && (isdigit (c2)))
3f99432c 1536 {
9300f661 1537 n->content.push_back (c);
3f99432c
FCE
1538 unsigned idx = 0;
1539 do
1540 {
1541 input_get ();
1542 idx = (idx * 10) + (c2 - '0');
9300f661 1543 n->content.push_back (c2);
3f99432c
FCE
1544 c2 = input_peek ();
1545 } while (c2 > 0 &&
dff50e09 1546 isdigit (c2) &&
3f99432c 1547 idx <= session.args.size()); // prevent overflow
16fc963f
SM
1548 if (suspended)
1549 {
1550 n->make_junk(_("invalid nested substitution of command line arguments"));
1551 return n;
1552 }
3f99432c
FCE
1553 if (idx == 0 ||
1554 idx-1 >= session.args.size())
16fc963f
SM
1555 {
1556 n->make_junk(_F("command line argument index %lu out of range [1-%lu]",
1557 (unsigned long) idx, (unsigned long) session.args.size()));
1558 return n;
1559 }
9300f661
JS
1560 const string& arg = session.args[idx-1];
1561 input_put ((c == '$') ? arg : lex_cast_qstring (arg), n);
1562 n->content.clear();
1563 goto skip;
3f99432c
FCE
1564 }
1565
b5477cd9 1566 else if (isalpha (c) || c == '$' || c == '@' || c == '_')
2f1a1aea
FCE
1567 {
1568 n->type = tok_identifier;
1569 n->content = (char) c;
b5477cd9 1570 while (isalnum (c2) || c2 == '_' || c2 == '$')
2f1a1aea 1571 {
3f99432c
FCE
1572 input_get ();
1573 n->content.push_back (c2);
1574 c2 = input_peek ();
6e213f58 1575 }
213bee8f 1576
66c7d4c1 1577 if (keywords.count(n->content))
3f99432c 1578 n->type = tok_keyword;
06219d6f 1579 else if (n->content[0] == '@')
dd90d565 1580 // makes it easier to detect illegal use of @words:
06219d6f 1581 n->type = tok_operator;
dff50e09 1582
2f1a1aea
FCE
1583 return n;
1584 }
1585
3a20432b 1586 else if (isdigit (c)) // positive literal
2f1a1aea 1587 {
2f1a1aea 1588 n->type = tok_number;
9c0c0e46
FCE
1589 n->content = (char) c;
1590
66c7d4c1 1591 while (isalnum (c2))
2f1a1aea 1592 {
9c0c0e46
FCE
1593 // NB: isalnum is very permissive. We rely on strtol, called in
1594 // parser::parse_literal below, to confirm that the number string
1595 // is correctly formatted and in range.
1596
66c7d4c1
JS
1597 input_get ();
1598 n->content.push_back (c2);
1599 c2 = input_peek ();
2f1a1aea
FCE
1600 }
1601 return n;
1602 }
1603
1604 else if (c == '\"')
1605 {
1606 n->type = tok_string;
1607 while (1)
1608 {
1609 c = input_get ();
1610
3f99432c 1611 if (c < 0 || c == '\n')
2f1a1aea 1612 {
16fc963f
SM
1613 n->make_junk(_("Could not find matching closing quote"));
1614 return n;
2f1a1aea
FCE
1615 }
1616 if (c == '\"') // closing double-quotes
1617 break;
3f99432c 1618 else if (c == '\\') // see also input_put
dff50e09 1619 {
7d46afb8
GH
1620 c = input_get ();
1621 switch (c)
1622 {
ef8a6134
SM
1623 case 'x':
1624 if (strverscmp(session.compatible.c_str(), "2.3") < 0)
1625 goto the_default;
7d46afb8
GH
1626 case 'a':
1627 case 'b':
1628 case 't':
1629 case 'n':
1630 case 'v':
1631 case 'f':
1632 case 'r':
f03954fd 1633 case '0' ... '7': // NB: need only match the first digit
7d46afb8 1634 case '\\':
7d46afb8 1635 // Pass these escapes through to the string value
dff50e09 1636 // being parsed; it will be emitted into a C literal.
c7c8d469
FCE
1637 // XXX: PR13371: perhaps we should evaluate them here
1638 // (and re-quote them during translate.cxx emission).
7d46afb8
GH
1639 n->content.push_back('\\');
1640
3f99432c 1641 // fall through
ef8a6134
SM
1642 default: the_default:
1643 n->content.push_back(c);
1644 break;
7d46afb8 1645 }
2f1a1aea
FCE
1646 }
1647 else
1648 n->content.push_back(c);
1649 }
1650 return n;
1651 }
1652
1653 else if (ispunct (c))
1654 {
bb2e3076 1655 int c3 = input_peek (1);
2f1a1aea 1656
3a20432b
FCE
1657 // NB: if we were to recognize negative numeric literals here,
1658 // we'd introduce another grammar ambiguity:
1659 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
1660 // instead of tok_number(1) tok_operator('-') tok_number(1)
1661
66c7d4c1 1662 if (c == '#') // shell comment
2f1a1aea
FCE
1663 {
1664 unsigned this_line = cursor_line;
bb2e3076
FCE
1665 do { c = input_get (); }
1666 while (c >= 0 && cursor_line == this_line);
fee28e5c 1667 ate_comment = true;
b5477cd9 1668 ate_whitespace = true;
2f1a1aea
FCE
1669 goto skip;
1670 }
66c7d4c1 1671 else if ((c == '/' && c2 == '/')) // C++ comment
63a7c90e
FCE
1672 {
1673 unsigned this_line = cursor_line;
bb2e3076
FCE
1674 do { c = input_get (); }
1675 while (c >= 0 && cursor_line == this_line);
fee28e5c 1676 ate_comment = true;
b5477cd9 1677 ate_whitespace = true;
63a7c90e
FCE
1678 goto skip;
1679 }
1680 else if (c == '/' && c2 == '*') // C comment
1681 {
66c7d4c1
JS
1682 (void) input_get (); // swallow '*' already in c2
1683 c = input_get ();
63a7c90e 1684 c2 = input_get ();
bb2e3076 1685 while (c2 >= 0)
63a7c90e 1686 {
66c7d4c1
JS
1687 if (c == '*' && c2 == '/')
1688 break;
63a7c90e
FCE
1689 c = c2;
1690 c2 = input_get ();
63a7c90e 1691 }
fee28e5c 1692 ate_comment = true;
b5477cd9 1693 ate_whitespace = true;
bb2e3076 1694 goto skip;
63a7c90e 1695 }
54dfabe9
FCE
1696 else if (c == '%' && c2 == '{') // embedded code
1697 {
1698 n->type = tok_embedded;
1699 (void) input_get (); // swallow '{' already in c2
66c7d4c1
JS
1700 c = input_get ();
1701 c2 = input_get ();
1702 while (c2 >= 0)
54dfabe9 1703 {
66c7d4c1
JS
1704 if (c == '%' && c2 == '}')
1705 return n;
ebbf9df4
FCE
1706 if (c == '}' && c2 == '%') // possible typo
1707 session.print_warning (_("possible erroneous closing '}%', use '%}'?"), n);
54dfabe9 1708 n->content += c;
66c7d4c1
JS
1709 c = c2;
1710 c2 = input_get ();
54dfabe9 1711 }
72cdb9cd 1712
16fc963f
SM
1713 n->make_junk(_("Could not find matching '%}' to close embedded function block"));
1714 return n;
54dfabe9 1715 }
2f1a1aea 1716
bb2e3076
FCE
1717 // We're committed to recognizing at least the first character
1718 // as an operator.
2f1a1aea 1719 n->type = tok_operator;
66c7d4c1 1720 n->content = c;
2f1a1aea 1721
bb2e3076 1722 // match all valid operators, in decreasing size order
66c7d4c1
JS
1723 if ((c == '<' && c2 == '<' && c3 == '<') ||
1724 (c == '<' && c2 == '<' && c3 == '=') ||
1725 (c == '>' && c2 == '>' && c3 == '='))
82919855 1726 {
66c7d4c1
JS
1727 n->content += c2;
1728 n->content += c3;
bb2e3076
FCE
1729 input_get (); input_get (); // swallow other two characters
1730 }
66c7d4c1
JS
1731 else if ((c == '=' && c2 == '=') ||
1732 (c == '!' && c2 == '=') ||
1733 (c == '<' && c2 == '=') ||
1734 (c == '>' && c2 == '=') ||
93daaca8
SM
1735 (c == '=' && c2 == '~') ||
1736 (c == '!' && c2 == '~') ||
66c7d4c1
JS
1737 (c == '+' && c2 == '=') ||
1738 (c == '-' && c2 == '=') ||
1739 (c == '*' && c2 == '=') ||
1740 (c == '/' && c2 == '=') ||
1741 (c == '%' && c2 == '=') ||
1742 (c == '&' && c2 == '=') ||
1743 (c == '^' && c2 == '=') ||
1744 (c == '|' && c2 == '=') ||
1745 (c == '.' && c2 == '=') ||
1746 (c == '&' && c2 == '&') ||
1747 (c == '|' && c2 == '|') ||
1748 (c == '+' && c2 == '+') ||
1749 (c == '-' && c2 == '-') ||
1750 (c == '-' && c2 == '>') ||
1751 (c == '<' && c2 == '<') ||
1752 (c == '>' && c2 == '>') ||
177a8ead 1753 // preprocessor tokens
66c7d4c1
JS
1754 (c == '%' && c2 == '(') ||
1755 (c == '%' && c2 == '?') ||
1756 (c == '%' && c2 == ':') ||
1757 (c == '%' && c2 == ')'))
bb2e3076 1758 {
66c7d4c1 1759 n->content += c2;
bb2e3076 1760 input_get (); // swallow other character
dff50e09 1761 }
2f1a1aea
FCE
1762
1763 return n;
1764 }
1765
1766 else
1767 {
1768 n->type = tok_junk;
e3795795
FCE
1769 ostringstream s;
1770 s << "\\x" << hex << setw(2) << setfill('0') << c;
1771 n->content = s.str();
16fc963f 1772 n->msg = ""; // signal parser to emit "expected X, found junk" type error
2f1a1aea
FCE
1773 return n;
1774 }
1775}
1776
16fc963f
SM
1777// ------------------------------------------------------------------------
1778
1779void
1780token::make_junk (const string new_msg)
1781{
1782 type = tok_junk;
1783 msg = new_msg;
1784}
2f1a1aea
FCE
1785
1786// ------------------------------------------------------------------------
1787
1788stapfile*
7ac01ea0 1789parser::parse (bool errs_as_warnings)
2f1a1aea
FCE
1790{
1791 stapfile* f = new stapfile;
1b1b4ceb 1792 input.set_current_file (f);
56099f08
FCE
1793
1794 bool empty = true;
1795
2f1a1aea
FCE
1796 while (1)
1797 {
1798 try
1799 {
a07a2c28 1800 systemtap_v_seen = 0;
2f1a1aea 1801 const token* t = peek ();
534aad8b 1802 if (! t) // nice clean EOF, modulo any preprocessing that occurred
2f1a1aea
FCE
1803 break;
1804
56099f08 1805 empty = false;
6e213f58
DS
1806 if (t->type == tok_keyword && t->content == "probe")
1807 {
1808 context = con_probe;
1809 parse_probe (f->probes, f->aliases);
1810 }
1811 else if (t->type == tok_keyword && t->content == "global")
1812 {
1813 context = con_global;
4b5f3e45 1814 parse_global (f->globals, f->probes);
6e213f58
DS
1815 }
1816 else if (t->type == tok_keyword && t->content == "function")
1817 {
1818 context = con_function;
1819 parse_functiondecl (f->functions);
1820 }
54dfabe9 1821 else if (t->type == tok_embedded)
6e213f58
DS
1822 {
1823 context = con_embedded;
1824 f->embeds.push_back (parse_embeddedcode ());
1825 }
2f1a1aea 1826 else
6e213f58
DS
1827 {
1828 context = con_unknown;
f0454224 1829 throw PARSE_ERROR (_("expected 'probe', 'global', 'function', or '%{'"));
6e213f58 1830 }
2f1a1aea
FCE
1831 }
1832 catch (parse_error& pe)
1833 {
7ac01ea0 1834 print_error (pe, errs_as_warnings);
16fc963f
SM
1835
1836 // XXX: do we want tok_junk to be able to force skip_some behaviour?
cd7116b8 1837 if (pe.skip_some) // for recovery
46954f1d
FCE
1838 // Quietly swallow all tokens until the next keyword we can start parsing from.
1839 while (1)
1840 try
1841 {
cd7116b8
FCE
1842 {
1843 const token* t = peek ();
1844 if (! t)
1845 break;
46954f1d
FCE
1846 if (t->type == tok_keyword && t->content == "probe") break;
1847 else if (t->type == tok_keyword && t->content == "global") break;
1848 else if (t->type == tok_keyword && t->content == "function") break;
1849 else if (t->type == tok_embedded) break;
731a5359 1850 swallow (); // swallow it
cd7116b8 1851 }
46954f1d
FCE
1852 }
1853 catch (parse_error& pe2)
1854 {
1855 // parse error during recovery ... ugh
1856 print_error (pe2);
1857 }
177a8ead 1858 }
2f1a1aea
FCE
1859 }
1860
56099f08
FCE
1861 if (empty)
1862 {
534aad8b
SM
1863 // vary message depending on whether file was *actually* empty:
1864 cerr << (input.saw_tokens
1865 ? _F("Input file '%s' is empty after preprocessing.", input_name.c_str())
4cd32d8c 1866 : _F("Input file '%s' is empty.", input_name.c_str()))
534aad8b 1867 << endl;
56099f08 1868 delete f;
2203b032 1869 f = 0;
56099f08
FCE
1870 }
1871 else if (num_errors > 0)
2f1a1aea 1872 {
52c2652f 1873 cerr << _NF("%d parse error.", "%d parse errors.", num_errors, num_errors) << endl;
2f1a1aea 1874 delete f;
2203b032 1875 f = 0;
2f1a1aea 1876 }
dff50e09 1877
2203b032 1878 input.set_current_file(0);
2f1a1aea
FCE
1879 return f;
1880}
1881
1882
101b0805
JS
1883probe*
1884parser::parse_synthetic_probe (const token* chain, bool errs_as_warnings)
1885{
1886 probe* p = NULL;
1887 stapfile* f = new stapfile;
1888 f->synthetic = true;
1889 input.set_current_file (f);
1890 input.set_current_token_chain (chain);
1891
1892 try
1893 {
1894 context = con_probe;
1895 parse_probe (f->probes, f->aliases);
1896
1897 if (f->probes.size() != 1 || !f->aliases.empty())
1898 throw PARSE_ERROR (_("expected a single synthetic probe"));
1899 p = f->probes[0];
1900 }
1901 catch (parse_error& pe)
1902 {
1903 print_error (pe, errs_as_warnings);
1904 }
1905
1906 // TODO check for unparsed tokens?
1907
1908 input.set_current_file(0);
1909 input.set_current_token_chain(0);
1910 return p;
1911}
1912
1913
20c6c071 1914void
54dfabe9
FCE
1915parser::parse_probe (std::vector<probe *> & probe_ret,
1916 std::vector<probe_alias *> & alias_ret)
2f1a1aea 1917{
82919855 1918 const token* t0 = next ();
6e213f58 1919 if (! (t0->type == tok_keyword && t0->content == "probe"))
f0454224 1920 throw PARSE_ERROR (_("expected 'probe'"));
82919855 1921
20c6c071
GH
1922 vector<probe_point *> aliases;
1923 vector<probe_point *> locations;
1924
1925 bool equals_ok = true;
82919855 1926
97266278
LG
1927 int epilogue_alias = 0;
1928
2f1a1aea
FCE
1929 while (1)
1930 {
b4ceace2 1931 probe_point * pp = parse_probe_point ();
dff50e09 1932
b4ceace2 1933 const token* t = peek ();
dff50e09 1934 if (equals_ok && t
b4ceace2
FCE
1935 && t->type == tok_operator && t->content == "=")
1936 {
1ad820e3 1937 if (pp->optional || pp->sufficient)
f0454224 1938 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pp->components.front()->tok);
b4ceace2 1939 aliases.push_back(pp);
731a5359 1940 swallow ();
b4ceace2
FCE
1941 continue;
1942 }
dff50e09 1943 else if (equals_ok && t
97266278
LG
1944 && t->type == tok_operator && t->content == "+=")
1945 {
1ad820e3 1946 if (pp->optional || pp->sufficient)
f0454224 1947 throw PARSE_ERROR (_("probe point alias name cannot be optional nor sufficient"), pp->components.front()->tok);
97266278
LG
1948 aliases.push_back(pp);
1949 epilogue_alias = 1;
731a5359 1950 swallow ();
97266278
LG
1951 continue;
1952 }
b4ceace2
FCE
1953 else if (t && t->type == tok_operator && t->content == ",")
1954 {
1955 locations.push_back(pp);
1956 equals_ok = false;
731a5359 1957 swallow ();
b4ceace2
FCE
1958 continue;
1959 }
1960 else if (t && t->type == tok_operator && t->content == "{")
1961 {
1962 locations.push_back(pp);
1963 break;
1964 }
2f1a1aea 1965 else
f0454224 1966 throw PARSE_ERROR (_("expected probe point specifier"));
2f1a1aea 1967 }
20c6c071 1968
20c6c071
GH
1969 if (aliases.empty())
1970 {
54dfabe9
FCE
1971 probe* p = new probe;
1972 p->tok = t0;
1973 p->locations = locations;
1974 p->body = parse_stmt_block ();
37ebca01 1975 p->privileged = privileged;
a07a2c28 1976 p->systemtap_v_conditional = systemtap_v_seen;
54dfabe9 1977 probe_ret.push_back (p);
20c6c071
GH
1978 }
1979 else
1980 {
54dfabe9 1981 probe_alias* p = new probe_alias (aliases);
97266278
LG
1982 if(epilogue_alias)
1983 p->epilogue_style = true;
1984 else
1985 p->epilogue_style = false;
54dfabe9
FCE
1986 p->tok = t0;
1987 p->locations = locations;
1988 p->body = parse_stmt_block ();
37ebca01 1989 p->privileged = privileged;
a07a2c28 1990 p->systemtap_v_conditional = systemtap_v_seen;
54dfabe9 1991 alias_ret.push_back (p);
20c6c071 1992 }
54dfabe9 1993}
20c6c071 1994
54dfabe9
FCE
1995
1996embeddedcode*
1997parser::parse_embeddedcode ()
1998{
1999 embeddedcode* e = new embeddedcode;
2000 const token* t = next ();
2001 if (t->type != tok_embedded)
f0454224 2002 throw PARSE_ERROR (_("expected '%{'"));
24cb178f
FCE
2003
2004 if (! privileged)
f0454224 2005 throw PARSE_ERROR (_("embedded code in unprivileged script; need stap -g"),
cd7116b8 2006 false /* don't skip tokens for parse resumption */);
54dfabe9
FCE
2007
2008 e->tok = t;
2009 e->code = t->content;
2010 return e;
2f1a1aea
FCE
2011}
2012
2013
2014block*
56099f08 2015parser::parse_stmt_block ()
2f1a1aea
FCE
2016{
2017 block* pb = new block;
2018
56099f08
FCE
2019 const token* t = next ();
2020 if (! (t->type == tok_operator && t->content == "{"))
f0454224 2021 throw PARSE_ERROR (_("expected '{'"));
56099f08
FCE
2022
2023 pb->tok = t;
2b066ec1 2024
2f1a1aea
FCE
2025 while (1)
2026 {
46954f1d
FCE
2027 t = peek ();
2028 if (t && t->type == tok_operator && t->content == "}")
2029 {
731a5359 2030 swallow ();
46954f1d
FCE
2031 break;
2032 }
2033 pb->statements.push_back (parse_statement ());
2f1a1aea
FCE
2034 }
2035
2036 return pb;
2037}
2038
2039
f4fe2e93
FCE
2040try_block*
2041parser::parse_try_block ()
2042{
2043 try_block* pb = new try_block;
2044
731a5359 2045 pb->tok = expect_kw_token ("try");
f4fe2e93
FCE
2046 pb->try_block = parse_stmt_block();
2047 expect_kw ("catch");
2048
2049 const token* t = peek ();
3819d181 2050 if (t != NULL && t->type == tok_operator && t->content == "(")
f4fe2e93 2051 {
731a5359 2052 swallow (); // swallow the '('
f4fe2e93
FCE
2053
2054 t = next();
2055 if (! (t->type == tok_identifier))
f0454224 2056 throw PARSE_ERROR (_("expected identifier"));
f4fe2e93
FCE
2057 symbol* sym = new symbol;
2058 sym->tok = t;
2059 sym->name = t->content;
2060 pb->catch_error_var = sym;
2061
2062 expect_op (")");
2063 }
2064 else
2065 pb->catch_error_var = 0;
2066
2067 pb->catch_block = parse_stmt_block();
2068
2069 return pb;
2070}
2071
2072
2073
2f1a1aea
FCE
2074statement*
2075parser::parse_statement ()
2076{
40b71c47 2077 statement *ret;
2f1a1aea
FCE
2078 const token* t = peek ();
2079 if (t && t->type == tok_operator && t->content == ";")
f946b10f 2080 return new null_statement (next ());
dff50e09 2081 else if (t && t->type == tok_operator && t->content == "{")
40b71c47 2082 return parse_stmt_block (); // Don't squash semicolons.
f4fe2e93
FCE
2083 else if (t && t->type == tok_keyword && t->content == "try")
2084 return parse_try_block (); // Don't squash semicolons.
6e213f58 2085 else if (t && t->type == tok_keyword && t->content == "if")
40b71c47 2086 return parse_if_statement (); // Don't squash semicolons.
6e213f58 2087 else if (t && t->type == tok_keyword && t->content == "for")
40b71c47 2088 return parse_for_loop (); // Don't squash semicolons.
6e213f58 2089 else if (t && t->type == tok_keyword && t->content == "foreach")
40b71c47
MW
2090 return parse_foreach_loop (); // Don't squash semicolons.
2091 else if (t && t->type == tok_keyword && t->content == "while")
2092 return parse_while_loop (); // Don't squash semicolons.
6e213f58 2093 else if (t && t->type == tok_keyword && t->content == "return")
40b71c47 2094 ret = parse_return_statement ();
6e213f58 2095 else if (t && t->type == tok_keyword && t->content == "delete")
40b71c47 2096 ret = parse_delete_statement ();
6e213f58 2097 else if (t && t->type == tok_keyword && t->content == "break")
40b71c47 2098 ret = parse_break_statement ();
6e213f58 2099 else if (t && t->type == tok_keyword && t->content == "continue")
40b71c47 2100 ret = parse_continue_statement ();
6e213f58 2101 else if (t && t->type == tok_keyword && t->content == "next")
40b71c47 2102 ret = parse_next_statement ();
2f1a1aea
FCE
2103 else if (t && (t->type == tok_operator || // expressions are flexible
2104 t->type == tok_identifier ||
2105 t->type == tok_number ||
7d902887
FCE
2106 t->type == tok_string ||
2107 t->type == tok_embedded ))
40b71c47 2108 ret = parse_expr_statement ();
54dfabe9 2109 // XXX: consider generally accepting tok_embedded here too
2f1a1aea 2110 else
f0454224 2111 throw PARSE_ERROR (_("expected statement"));
40b71c47
MW
2112
2113 // Squash "empty" trailing colons after any "non-block-like" statement.
2114 t = peek ();
2115 if (t && t->type == tok_operator && t->content == ";")
2116 {
731a5359 2117 swallow (); // Silently eat trailing ; after statement
40b71c47
MW
2118 }
2119
2120 return ret;
2f1a1aea
FCE
2121}
2122
2123
56099f08 2124void
78f6bba6 2125parser::parse_global (vector <vardecl*>& globals, vector<probe*>&)
2f1a1aea 2126{
82919855 2127 const token* t0 = next ();
6e213f58 2128 if (! (t0->type == tok_keyword && t0->content == "global"))
f0454224 2129 throw PARSE_ERROR (_("expected 'global'"));
731a5359 2130 swallow ();
82919855 2131
56099f08
FCE
2132 while (1)
2133 {
2134 const token* t = next ();
2135 if (! (t->type == tok_identifier))
f0454224 2136 throw PARSE_ERROR (_("expected identifier"));
56099f08 2137
2b066ec1
FCE
2138 for (unsigned i=0; i<globals.size(); i++)
2139 if (globals[i]->name == t->content)
f0454224 2140 throw PARSE_ERROR (_("duplicate global name"));
dff50e09 2141
24cb178f
FCE
2142 vardecl* d = new vardecl;
2143 d->name = t->content;
2144 d->tok = t;
a07a2c28 2145 d->systemtap_v_conditional = systemtap_v_seen;
24cb178f 2146 globals.push_back (d);
56099f08 2147
82919855 2148 t = peek ();
ef474d24 2149
74e6cc92
CM
2150 if(t && t->type == tok_operator && t->content == "%") //wrapping
2151 {
2152 d->wrap = true;
731a5359 2153 swallow ();
74e6cc92
CM
2154 t = peek();
2155 }
2156
ef474d24
JS
2157 if (t && t->type == tok_operator && t->content == "[") // array size
2158 {
2159 int64_t size;
731a5359 2160 swallow ();
ef474d24
JS
2161 expect_number(size);
2162 if (size <= 0 || size > 1000000) // arbitrary max
f0454224 2163 throw PARSE_ERROR(_("array size out of range"));
ef474d24
JS
2164 d->maxsize = (int)size;
2165 expect_known(tok_operator, "]");
2166 t = peek ();
2167 }
2168
4b5f3e45 2169 if (t && t->type == tok_operator && t->content == "=") // initialization
ef474d24
JS
2170 {
2171 if (!d->compatible_arity(0))
f0454224 2172 throw PARSE_ERROR(_("only scalar globals can be initialized"));
58701b78 2173 d->set_arity(0, t);
731a5359 2174 next (); // Don't swallow, set_arity() used the peeked token.
ef474d24
JS
2175 d->init = parse_literal ();
2176 d->type = d->init->type;
2177 t = peek ();
2178 }
4b5f3e45 2179
c3799d72 2180 if (t && t->type == tok_operator && t->content == ";") // termination
950da622 2181 {
731a5359 2182 swallow ();
950da622
MW
2183 break;
2184 }
c3799d72 2185
4b5f3e45 2186 if (t && t->type == tok_operator && t->content == ",") // next global
82919855 2187 {
731a5359 2188 swallow ();
82919855
FCE
2189 continue;
2190 }
56099f08 2191 else
82919855 2192 break;
56099f08
FCE
2193 }
2194}
2195
2196
24cb178f
FCE
2197void
2198parser::parse_functiondecl (std::vector<functiondecl*>& functions)
56099f08 2199{
82919855 2200 const token* t = next ();
6e213f58 2201 if (! (t->type == tok_keyword && t->content == "function"))
f0454224 2202 throw PARSE_ERROR (_("expected 'function'"));
731a5359 2203 swallow ();
56099f08 2204
82919855 2205 t = next ();
6e213f58
DS
2206 if (! (t->type == tok_identifier)
2207 && ! (t->type == tok_keyword
2208 && (t->content == "string" || t->content == "long")))
f0454224 2209 throw PARSE_ERROR (_("expected identifier"));
24cb178f
FCE
2210
2211 for (unsigned i=0; i<functions.size(); i++)
2212 if (functions[i]->name == t->content)
f0454224 2213 throw PARSE_ERROR (_("duplicate function name"));
24cb178f
FCE
2214
2215 functiondecl *fd = new functiondecl ();
56099f08
FCE
2216 fd->name = t->content;
2217 fd->tok = t;
2218
2219 t = next ();
6a505121
FCE
2220 if (t->type == tok_operator && t->content == ":")
2221 {
731a5359 2222 swallow ();
6a505121 2223 t = next ();
6e213f58 2224 if (t->type == tok_keyword && t->content == "string")
6a505121 2225 fd->type = pe_string;
6e213f58 2226 else if (t->type == tok_keyword && t->content == "long")
6a505121 2227 fd->type = pe_long;
f0454224 2228 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
731a5359 2229 swallow ();
6a505121
FCE
2230
2231 t = next ();
2232 }
2233
56099f08 2234 if (! (t->type == tok_operator && t->content == "("))
f0454224 2235 throw PARSE_ERROR (_("expected '('"));
731a5359 2236 swallow ();
56099f08
FCE
2237
2238 while (1)
2239 {
2240 t = next ();
2241
100a540e 2242 // permit zero-argument functions
56099f08 2243 if (t->type == tok_operator && t->content == ")")
731a5359
MW
2244 {
2245 swallow ();
2246 break;
2247 }
56099f08 2248 else if (! (t->type == tok_identifier))
f0454224 2249 throw PARSE_ERROR (_("expected identifier"));
56099f08
FCE
2250 vardecl* vd = new vardecl;
2251 vd->name = t->content;
2252 vd->tok = t;
2253 fd->formal_args.push_back (vd);
a07a2c28 2254 fd->systemtap_v_conditional = systemtap_v_seen;
56099f08
FCE
2255
2256 t = next ();
6a505121
FCE
2257 if (t->type == tok_operator && t->content == ":")
2258 {
731a5359 2259 swallow ();
6a505121 2260 t = next ();
6e213f58 2261 if (t->type == tok_keyword && t->content == "string")
6a505121 2262 vd->type = pe_string;
6e213f58 2263 else if (t->type == tok_keyword && t->content == "long")
6a505121 2264 vd->type = pe_long;
f0454224 2265 else throw PARSE_ERROR (_("expected 'string' or 'long'"));
731a5359 2266 swallow ();
6a505121
FCE
2267 t = next ();
2268 }
56099f08 2269 if (t->type == tok_operator && t->content == ")")
731a5359
MW
2270 {
2271 swallow ();
2272 break;
2273 }
56099f08 2274 if (t->type == tok_operator && t->content == ",")
731a5359
MW
2275 {
2276 swallow ();
2277 continue;
2278 }
56099f08 2279 else
f0454224 2280 throw PARSE_ERROR (_("expected ',' or ')'"));
56099f08
FCE
2281 }
2282
54dfabe9
FCE
2283 t = peek ();
2284 if (t && t->type == tok_embedded)
2285 fd->body = parse_embeddedcode ();
2286 else
2287 fd->body = parse_stmt_block ();
24cb178f
FCE
2288
2289 functions.push_back (fd);
2f1a1aea
FCE
2290}
2291
2292
9c0c0e46
FCE
2293probe_point*
2294parser::parse_probe_point ()
2f1a1aea 2295{
9c0c0e46 2296 probe_point* pl = new probe_point;
2f1a1aea 2297
9c0c0e46 2298 while (1)
2f1a1aea 2299 {
b5477cd9 2300 const token* t = next ();
6e213f58
DS
2301 if (! (t->type == tok_identifier
2302 // we must allow ".return" and ".function", which are keywords
b5477cd9
SM
2303 || t->type == tok_keyword
2304 // we must allow "*", due to being an operator
2305 || (t->type == tok_operator && t->content == "*")))
f0454224 2306 throw PARSE_ERROR (_("expected identifier or '*'"));
9c0c0e46 2307
b5477cd9
SM
2308 // loop which reconstitutes an identifier with wildcards
2309 string content = t->content;
2310 while (1)
2311 {
2312 const token* u = peek();
3819d181
MW
2313 if (u == NULL)
2314 break;
b5477cd9
SM
2315 // ensure pieces of the identifier are adjacent:
2316 if (input.ate_whitespace)
2317 break;
2318 // ensure pieces of the identifier are valid:
2319 if (! (u->type == tok_identifier
2320 // we must allow arbitrary keywords with a wildcard
2321 || u->type == tok_keyword
2322 // we must allow "*", due to being an operator
2323 || (u->type == tok_operator && u->content == "*")))
2324 break;
2325
2326 // append u to t
2327 content = content + u->content;
2328
2329 // consume u
731a5359 2330 swallow ();
b5477cd9 2331 }
534aad8b
SM
2332 // get around const-ness of t:
2333 token* new_t = new token(*t);
b5477cd9
SM
2334 new_t->content = content;
2335 delete t; t = new_t;
9c0c0e46
FCE
2336
2337 probe_point::component* c = new probe_point::component;
2338 c->functor = t->content;
f1a0157a 2339 c->tok = t;
9c0c0e46 2340 pl->components.push_back (c);
6e3347a9 2341 // NB we may add c->arg soon
9c0c0e46
FCE
2342
2343 t = peek ();
a477f3f1 2344
6e3347a9 2345 // consume optional parameter
9c0c0e46
FCE
2346 if (t && t->type == tok_operator && t->content == "(")
2347 {
731a5359 2348 swallow (); // consume "("
9c0c0e46
FCE
2349 c->arg = parse_literal ();
2350
2351 t = next ();
2352 if (! (t->type == tok_operator && t->content == ")"))
f0454224 2353 throw PARSE_ERROR (_("expected ')'"));
731a5359 2354 swallow ();
9c0c0e46
FCE
2355
2356 t = peek ();
9c0c0e46 2357 }
9c0c0e46
FCE
2358
2359 if (t && t->type == tok_operator && t->content == ".")
6e3347a9 2360 {
731a5359 2361 swallow ();
6e3347a9
FCE
2362 continue;
2363 }
2364
f1a0157a 2365 // We only fall through here at the end of a probe point (past
6e3347a9
FCE
2366 // all the dotted/parametrized components).
2367
d898100a
FCE
2368 if (t && t->type == tok_operator &&
2369 (t->content == "?" || t->content == "!"))
6e3347a9
FCE
2370 {
2371 pl->optional = true;
d898100a
FCE
2372 if (t->content == "!") pl->sufficient = true;
2373 // NB: sufficient implies optional
731a5359 2374 swallow ();
6e3347a9
FCE
2375 t = peek ();
2376 // fall through
cbbe8080
MH
2377 }
2378
2379 if (t && t->type == tok_keyword && t->content == "if")
2380 {
731a5359 2381 swallow ();
cbbe8080 2382 t = peek ();
00917a8a 2383 if (!(t && t->type == tok_operator && t->content == "("))
f0454224 2384 throw PARSE_ERROR (_("expected '('"));
731a5359 2385 swallow ();
cbbe8080
MH
2386
2387 pl->condition = parse_expression ();
2388
2389 t = peek ();
00917a8a 2390 if (!(t && t->type == tok_operator && t->content == ")"))
f0454224 2391 throw PARSE_ERROR (_("expected ')'"));
731a5359 2392 swallow ();
cbbe8080
MH
2393 t = peek ();
2394 // fall through
6e3347a9
FCE
2395 }
2396
dff50e09 2397 if (t && t->type == tok_operator
6e3347a9
FCE
2398 && (t->content == "{" || t->content == "," ||
2399 t->content == "=" || t->content == "+=" ))
2400 break;
dff50e09 2401
f0454224 2402 throw PARSE_ERROR (_("expected one of '. , ( ? ! { = +='"));
2f1a1aea
FCE
2403 }
2404
2405 return pl;
2406}
2407
2408
d24f1ff4
SM
2409literal_string*
2410parser::consume_string_literals(const token *t)
2411{
2412 literal_string *ls = new literal_string (t->content);
2413
2414 // PR11208: check if the next token is also a string literal;
2415 // auto-concatenate it. This is complicated to the extent that we
2416 // need to skip intermediate whitespace.
2417 //
2418 // NB for versions prior to 2.0: but don't skip over intervening comments
2419 const token *n = peek();
2420 while (n != NULL && n->type == tok_string
2421 && ! (strverscmp(session.compatible.c_str(), "2.0") < 0
2422 && input.ate_comment))
2423 {
2424 ls->value.append(next()->content); // consume and append the token
2425 n = peek();
2426 }
2427 return ls;
2428}
2429
2430
2431// Parse a string literal and perform backslash escaping on the contents:
2432literal_string*
2433parser::parse_literal_string ()
2434{
2435 const token* t = next ();
2436 literal_string* l;
2437 if (t->type == tok_string)
2438 l = consume_string_literals (t);
2439 else
f0454224 2440 throw PARSE_ERROR (_("expected literal string"));
d24f1ff4
SM
2441
2442 l->tok = t;
2443 return l;
2444}
2445
2446
2f1a1aea
FCE
2447literal*
2448parser::parse_literal ()
2449{
2450 const token* t = next ();
56099f08 2451 literal* l;
2f1a1aea 2452 if (t->type == tok_string)
c5be7511 2453 {
d24f1ff4 2454 l = consume_string_literals (t);
c5be7511 2455 }
16e8f21f 2456 else
9c0c0e46 2457 {
16e8f21f
JS
2458 bool neg = false;
2459 if (t->type == tok_operator && t->content == "-")
2460 {
2461 neg = true;
731a5359 2462 swallow ();
16e8f21f
JS
2463 t = next ();
2464 }
2465
2466 if (t->type == tok_number)
2467 {
2468 const char* startp = t->content.c_str ();
2469 char* endp = (char*) startp;
2470
2471 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
2472 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
79e6d33f
JS
2473 // since the lexer only gives us positive digit strings, but we'll
2474 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
16e8f21f
JS
2475 errno = 0;
2476 long long value = (long long) strtoull (startp, & endp, 0);
16e8f21f 2477 if (errno == ERANGE || errno == EINVAL || *endp != '\0'
79e6d33f 2478 || (neg && (unsigned long long) value > 9223372036854775808ULL)
16e8f21f
JS
2479 || (unsigned long long) value > 18446744073709551615ULL
2480 || value < -9223372036854775807LL-1)
f0454224 2481 throw PARSE_ERROR (_("number invalid or out of range"));
16e8f21f 2482
79e6d33f
JS
2483 if (neg)
2484 value = -value;
2485
16e8f21f
JS
2486 l = new literal_number (value);
2487 }
2488 else
f0454224 2489 throw PARSE_ERROR (_("expected literal string or number"));
9c0c0e46 2490 }
56099f08
FCE
2491
2492 l->tok = t;
2493 return l;
2f1a1aea
FCE
2494}
2495
2496
2497if_statement*
2498parser::parse_if_statement ()
2499{
2500 const token* t = next ();
6e213f58 2501 if (! (t->type == tok_keyword && t->content == "if"))
f0454224 2502 throw PARSE_ERROR (_("expected 'if'"));
56099f08
FCE
2503 if_statement* s = new if_statement;
2504 s->tok = t;
2505
2506 t = next ();
2f1a1aea 2507 if (! (t->type == tok_operator && t->content == "("))
f0454224 2508 throw PARSE_ERROR (_("expected '('"));
731a5359 2509 swallow ();
2f1a1aea 2510
2f1a1aea
FCE
2511 s->condition = parse_expression ();
2512
2513 t = next ();
2514 if (! (t->type == tok_operator && t->content == ")"))
f0454224 2515 throw PARSE_ERROR (_("expected ')'"));
731a5359 2516 swallow ();
2f1a1aea
FCE
2517
2518 s->thenblock = parse_statement ();
2519
2520 t = peek ();
6e213f58 2521 if (t && t->type == tok_keyword && t->content == "else")
2f1a1aea 2522 {
731a5359 2523 swallow ();
2f1a1aea
FCE
2524 s->elseblock = parse_statement ();
2525 }
ed10c639
FCE
2526 else
2527 s->elseblock = 0; // in case not otherwise initialized
2f1a1aea
FCE
2528
2529 return s;
2530}
2531
2532
69c68955
FCE
2533expr_statement*
2534parser::parse_expr_statement ()
2535{
2536 expr_statement *es = new expr_statement;
2537 const token* t = peek ();
5e58d11c 2538 if (t == NULL)
f0454224 2539 throw PARSE_ERROR (_("expression statement expected"));
731a5359
MW
2540 // Copy, we only peeked, parse_expression might swallow.
2541 es->tok = new token (*t);
69c68955
FCE
2542 es->value = parse_expression ();
2543 return es;
2544}
2545
2546
56099f08
FCE
2547return_statement*
2548parser::parse_return_statement ()
2549{
2550 const token* t = next ();
6e213f58 2551 if (! (t->type == tok_keyword && t->content == "return"))
f0454224 2552 throw PARSE_ERROR (_("expected 'return'"));
6e213f58 2553 if (context != con_function)
f0454224 2554 throw PARSE_ERROR (_("found 'return' not in function context"));
56099f08
FCE
2555 return_statement* s = new return_statement;
2556 s->tok = t;
2557 s->value = parse_expression ();
2558 return s;
2559}
2560
2561
2562delete_statement*
2563parser::parse_delete_statement ()
2564{
2565 const token* t = next ();
6e213f58 2566 if (! (t->type == tok_keyword && t->content == "delete"))
f0454224 2567 throw PARSE_ERROR (_("expected 'delete'"));
56099f08
FCE
2568 delete_statement* s = new delete_statement;
2569 s->tok = t;
2570 s->value = parse_expression ();
2571 return s;
2572}
2573
2574
f3c26ea5
FCE
2575next_statement*
2576parser::parse_next_statement ()
2577{
2578 const token* t = next ();
6e213f58 2579 if (! (t->type == tok_keyword && t->content == "next"))
f0454224 2580 throw PARSE_ERROR (_("expected 'next'"));
6e213f58 2581 if (context != con_probe)
f0454224 2582 throw PARSE_ERROR (_("found 'next' not in probe context"));
f3c26ea5
FCE
2583 next_statement* s = new next_statement;
2584 s->tok = t;
2585 return s;
2586}
2587
2588
2589break_statement*
2590parser::parse_break_statement ()
2591{
2592 const token* t = next ();
6e213f58 2593 if (! (t->type == tok_keyword && t->content == "break"))
f0454224 2594 throw PARSE_ERROR (_("expected 'break'"));
f3c26ea5
FCE
2595 break_statement* s = new break_statement;
2596 s->tok = t;
2597 return s;
2598}
2599
2600
2601continue_statement*
2602parser::parse_continue_statement ()
2603{
2604 const token* t = next ();
6e213f58 2605 if (! (t->type == tok_keyword && t->content == "continue"))
f0454224 2606 throw PARSE_ERROR (_("expected 'continue'"));
f3c26ea5
FCE
2607 continue_statement* s = new continue_statement;
2608 s->tok = t;
2609 return s;
2610}
2611
2612
69c68955
FCE
2613for_loop*
2614parser::parse_for_loop ()
2615{
f3c26ea5 2616 const token* t = next ();
6e213f58 2617 if (! (t->type == tok_keyword && t->content == "for"))
f0454224 2618 throw PARSE_ERROR (_("expected 'for'"));
f3c26ea5
FCE
2619 for_loop* s = new for_loop;
2620 s->tok = t;
2621
2622 t = next ();
2623 if (! (t->type == tok_operator && t->content == "("))
f0454224 2624 throw PARSE_ERROR (_("expected '('"));
731a5359 2625 swallow ();
f3c26ea5
FCE
2626
2627 // initializer + ";"
2628 t = peek ();
2629 if (t && t->type == tok_operator && t->content == ";")
2630 {
cbfbbf69 2631 s->init = 0;
731a5359 2632 swallow ();
f3c26ea5
FCE
2633 }
2634 else
2635 {
2636 s->init = parse_expr_statement ();
2637 t = next ();
2638 if (! (t->type == tok_operator && t->content == ";"))
f0454224 2639 throw PARSE_ERROR (_("expected ';'"));
731a5359 2640 swallow ();
f3c26ea5
FCE
2641 }
2642
2643 // condition + ";"
2644 t = peek ();
2645 if (t && t->type == tok_operator && t->content == ";")
2646 {
2647 literal_number* l = new literal_number(1);
2648 s->cond = l;
2649 s->cond->tok = next ();
2650 }
2651 else
2652 {
2653 s->cond = parse_expression ();
2654 t = next ();
2655 if (! (t->type == tok_operator && t->content == ";"))
f0454224 2656 throw PARSE_ERROR (_("expected ';'"));
731a5359 2657 swallow ();
f3c26ea5 2658 }
dff50e09 2659
f3c26ea5
FCE
2660 // increment + ")"
2661 t = peek ();
2662 if (t && t->type == tok_operator && t->content == ")")
2663 {
cbfbbf69 2664 s->incr = 0;
731a5359 2665 swallow ();
f3c26ea5
FCE
2666 }
2667 else
2668 {
2669 s->incr = parse_expr_statement ();
2670 t = next ();
2671 if (! (t->type == tok_operator && t->content == ")"))
f0454224 2672 throw PARSE_ERROR (_("expected ')'"));
731a5359 2673 swallow ();
f3c26ea5
FCE
2674 }
2675
2676 // block
2677 s->block = parse_statement ();
2678
2679 return s;
2680}
2681
2682
2683for_loop*
2684parser::parse_while_loop ()
2685{
2686 const token* t = next ();
6e213f58 2687 if (! (t->type == tok_keyword && t->content == "while"))
f0454224 2688 throw PARSE_ERROR (_("expected 'while'"));
f3c26ea5
FCE
2689 for_loop* s = new for_loop;
2690 s->tok = t;
2691
2692 t = next ();
2693 if (! (t->type == tok_operator && t->content == "("))
f0454224 2694 throw PARSE_ERROR (_("expected '('"));
731a5359 2695 swallow ();
f3c26ea5
FCE
2696
2697 // dummy init and incr fields
cbfbbf69
FCE
2698 s->init = 0;
2699 s->incr = 0;
f3c26ea5
FCE
2700
2701 // condition
2702 s->cond = parse_expression ();
2703
f3c26ea5
FCE
2704 t = next ();
2705 if (! (t->type == tok_operator && t->content == ")"))
f0454224 2706 throw PARSE_ERROR (_("expected ')'"));
731a5359 2707 swallow ();
dff50e09 2708
f3c26ea5
FCE
2709 // block
2710 s->block = parse_statement ();
2711
2712 return s;
69c68955
FCE
2713}
2714
2715
2716foreach_loop*
2717parser::parse_foreach_loop ()
2718{
2719 const token* t = next ();
6e213f58 2720 if (! (t->type == tok_keyword && t->content == "foreach"))
f0454224 2721 throw PARSE_ERROR (_("expected 'foreach'"));
69c68955
FCE
2722 foreach_loop* s = new foreach_loop;
2723 s->tok = t;
93484556 2724 s->sort_direction = 0;
fd5689dc 2725 s->sort_aggr = sc_none;
c261711d 2726 s->value = NULL;
27f21e8c 2727 s->limit = NULL;
69c68955
FCE
2728
2729 t = next ();
2730 if (! (t->type == tok_operator && t->content == "("))
f0454224 2731 throw PARSE_ERROR (_("expected '('"));
731a5359 2732 swallow ();
69c68955 2733
c261711d
JS
2734 symbol* lookahead_sym = NULL;
2735 int lookahead_sort = 0;
2736
2737 t = peek ();
2738 if (t && t->type == tok_identifier)
2739 {
2740 next ();
2741 lookahead_sym = new symbol;
2742 lookahead_sym->tok = t;
2743 lookahead_sym->name = t->content;
2744
2745 t = peek ();
2746 if (t && t->type == tok_operator &&
2747 (t->content == "+" || t->content == "-"))
2748 {
c261711d 2749 lookahead_sort = (t->content == "+") ? 1 : -1;
731a5359 2750 swallow ();
c261711d
JS
2751 }
2752
2753 t = peek ();
2754 if (t && t->type == tok_operator && t->content == "=")
2755 {
731a5359 2756 swallow ();
c261711d
JS
2757 s->value = lookahead_sym;
2758 if (lookahead_sort)
2759 {
2760 s->sort_direction = lookahead_sort;
2761 s->sort_column = 0;
2762 }
2763 lookahead_sym = NULL;
2764 }
2765 }
2766
69c68955
FCE
2767 // see also parse_array_in
2768
2769 bool parenthesized = false;
2770 t = peek ();
c261711d 2771 if (!lookahead_sym && t && t->type == tok_operator && t->content == "[")
69c68955 2772 {
731a5359 2773 swallow ();
69c68955
FCE
2774 parenthesized = true;
2775 }
2776
c261711d
JS
2777 if (lookahead_sym)
2778 {
2779 s->indexes.push_back (lookahead_sym);
2780 if (lookahead_sort)
2781 {
2782 s->sort_direction = lookahead_sort;
2783 s->sort_column = 1;
2784 }
2785 lookahead_sym = NULL;
2786 }
2787 else while (1)
69c68955
FCE
2788 {
2789 t = next ();
2790 if (! (t->type == tok_identifier))
f0454224 2791 throw PARSE_ERROR (_("expected identifier"));
69c68955
FCE
2792 symbol* sym = new symbol;
2793 sym->tok = t;
2794 sym->name = t->content;
2795 s->indexes.push_back (sym);
2796
93484556
FCE
2797 t = peek ();
2798 if (t && t->type == tok_operator &&
2799 (t->content == "+" || t->content == "-"))
2800 {
2801 if (s->sort_direction)
f0454224 2802 throw PARSE_ERROR (_("multiple sort directives"));
93484556
FCE
2803 s->sort_direction = (t->content == "+") ? 1 : -1;
2804 s->sort_column = s->indexes.size();
731a5359 2805 swallow ();
93484556
FCE
2806 }
2807
69c68955
FCE
2808 if (parenthesized)
2809 {
93484556 2810 t = peek ();
69c68955
FCE
2811 if (t && t->type == tok_operator && t->content == ",")
2812 {
731a5359 2813 swallow ();
69c68955
FCE
2814 continue;
2815 }
2816 else if (t && t->type == tok_operator && t->content == "]")
2817 {
731a5359 2818 swallow ();
69c68955
FCE
2819 break;
2820 }
dff50e09 2821 else
f0454224 2822 throw PARSE_ERROR (_("expected ',' or ']'"));
69c68955
FCE
2823 }
2824 else
2825 break; // expecting only one expression
2826 }
2827
2828 t = next ();
6e213f58 2829 if (! (t->type == tok_keyword && t->content == "in"))
f0454224 2830 throw PARSE_ERROR (_("expected 'in'"));
731a5359 2831 swallow ();
dff50e09 2832
d02548c0 2833 s->base = parse_indexable();
69c68955 2834
fd5689dc
FCE
2835 // check for atword, see also expect_ident_or_atword,
2836 t = peek ();
2837 if (t && t->type == tok_operator && t->content[0] == '@')
2838 {
2839 if (t->content == "@avg") s->sort_aggr = sc_average;
2840 else if (t->content == "@min") s->sort_aggr = sc_min;
2841 else if (t->content == "@max") s->sort_aggr = sc_max;
2842 else if (t->content == "@count") s->sort_aggr = sc_count;
2843 else if (t->content == "@sum") s->sort_aggr = sc_sum;
f0454224 2844 else throw PARSE_ERROR(_("expected statistical operation"));
fd5689dc
FCE
2845 swallow();
2846
2847 t = peek ();
2848 if (! (t && t->type == tok_operator && (t->content == "+" || t->content == "-")))
f0454224 2849 throw PARSE_ERROR(_("expected sort directive"));
fd5689dc
FCE
2850 }
2851
93484556
FCE
2852 t = peek ();
2853 if (t && t->type == tok_operator &&
2854 (t->content == "+" || t->content == "-"))
2855 {
2856 if (s->sort_direction)
f0454224 2857 throw PARSE_ERROR (_("multiple sort directives"));
93484556
FCE
2858 s->sort_direction = (t->content == "+") ? 1 : -1;
2859 s->sort_column = 0;
731a5359 2860 swallow ();
93484556
FCE
2861 }
2862
27f21e8c
DS
2863 t = peek ();
2864 if (tok_is(t, tok_keyword, "limit"))
2865 {
731a5359 2866 swallow (); // get past the "limit"
27f21e8c
DS
2867 s->limit = parse_expression ();
2868 }
2869
69c68955
FCE
2870 t = next ();
2871 if (! (t->type == tok_operator && t->content == ")"))
f0454224 2872 throw PARSE_ERROR ("expected ')'");
731a5359 2873 swallow ();
69c68955
FCE
2874
2875 s->block = parse_statement ();
2876 return s;
2877}
2878
2879
2f1a1aea
FCE
2880expression*
2881parser::parse_expression ()
2882{
2883 return parse_assignment ();
2884}
2885
2f1a1aea
FCE
2886
2887expression*
2888parser::parse_assignment ()
2889{
2890 expression* op1 = parse_ternary ();
2891
2892 const token* t = peek ();
82919855 2893 // right-associative operators
dff50e09 2894 if (t && t->type == tok_operator
2f1a1aea 2895 && (t->content == "=" ||
82919855 2896 t->content == "<<<" ||
2f1a1aea 2897 t->content == "+=" ||
bb2e3076
FCE
2898 t->content == "-=" ||
2899 t->content == "*=" ||
2900 t->content == "/=" ||
2901 t->content == "%=" ||
2902 t->content == "<<=" ||
2903 t->content == ">>=" ||
2904 t->content == "&=" ||
2905 t->content == "^=" ||
2906 t->content == "|=" ||
d5d7c2cc 2907 t->content == ".=" ||
dff50e09 2908 false))
2f1a1aea 2909 {
bb2e3076 2910 // NB: lvalueness is checked during elaboration / translation
2f1a1aea 2911 assignment* e = new assignment;
56099f08 2912 e->left = op1;
2f1a1aea 2913 e->op = t->content;
56099f08 2914 e->tok = t;
2f1a1aea 2915 next ();
82919855 2916 e->right = parse_expression ();
56099f08 2917 op1 = e;
2f1a1aea 2918 }
56099f08
FCE
2919
2920 return op1;
2f1a1aea
FCE
2921}
2922
2923
2924expression*
2925parser::parse_ternary ()
2926{
2927 expression* op1 = parse_logical_or ();
2928
2929 const token* t = peek ();
2930 if (t && t->type == tok_operator && t->content == "?")
2931 {
2f1a1aea 2932 ternary_expression* e = new ternary_expression;
56099f08 2933 e->tok = t;
2f1a1aea 2934 e->cond = op1;
56099f08
FCE
2935 next ();
2936 e->truevalue = parse_expression (); // XXX
2f1a1aea
FCE
2937
2938 t = next ();
2939 if (! (t->type == tok_operator && t->content == ":"))
f0454224 2940 throw PARSE_ERROR (_("expected ':'"));
731a5359 2941 swallow ();
2f1a1aea 2942
56099f08 2943 e->falsevalue = parse_expression (); // XXX
2f1a1aea
FCE
2944 return e;
2945 }
2946 else
2947 return op1;
2948}
2949
2950
2951expression*
2952parser::parse_logical_or ()
2953{
2954 expression* op1 = parse_logical_and ();
dff50e09 2955
2f1a1aea 2956 const token* t = peek ();
56099f08 2957 while (t && t->type == tok_operator && t->content == "||")
2f1a1aea 2958 {
2f1a1aea 2959 logical_or_expr* e = new logical_or_expr;
56099f08
FCE
2960 e->tok = t;
2961 e->op = t->content;
2f1a1aea 2962 e->left = op1;
56099f08
FCE
2963 next ();
2964 e->right = parse_logical_and ();
2965 op1 = e;
2966 t = peek ();
2f1a1aea 2967 }
56099f08
FCE
2968
2969 return op1;
2f1a1aea
FCE
2970}
2971
2972
2973expression*
2974parser::parse_logical_and ()
2975{
bb2e3076 2976 expression* op1 = parse_boolean_or ();
2f1a1aea
FCE
2977
2978 const token* t = peek ();
56099f08 2979 while (t && t->type == tok_operator && t->content == "&&")
2f1a1aea 2980 {
2f1a1aea
FCE
2981 logical_and_expr *e = new logical_and_expr;
2982 e->left = op1;
56099f08
FCE
2983 e->op = t->content;
2984 e->tok = t;
2985 next ();
bb2e3076
FCE
2986 e->right = parse_boolean_or ();
2987 op1 = e;
2988 t = peek ();
2989 }
2990
2991 return op1;
2992}
2993
2994
2995expression*
2996parser::parse_boolean_or ()
2997{
2998 expression* op1 = parse_boolean_xor ();
2999
3000 const token* t = peek ();
3001 while (t && t->type == tok_operator && t->content == "|")
3002 {
3003 binary_expression* e = new binary_expression;
3004 e->left = op1;
3005 e->op = t->content;
3006 e->tok = t;
3007 next ();
3008 e->right = parse_boolean_xor ();
3009 op1 = e;
3010 t = peek ();
3011 }
3012
3013 return op1;
3014}
3015
3016
3017expression*
3018parser::parse_boolean_xor ()
3019{
3020 expression* op1 = parse_boolean_and ();
3021
3022 const token* t = peek ();
3023 while (t && t->type == tok_operator && t->content == "^")
3024 {
3025 binary_expression* e = new binary_expression;
3026 e->left = op1;
3027 e->op = t->content;
3028 e->tok = t;
3029 next ();
3030 e->right = parse_boolean_and ();
3031 op1 = e;
3032 t = peek ();
3033 }
3034
3035 return op1;
3036}
3037
3038
3039expression*
3040parser::parse_boolean_and ()
3041{
3042 expression* op1 = parse_array_in ();
3043
3044 const token* t = peek ();
3045 while (t && t->type == tok_operator && t->content == "&")
3046 {
3047 binary_expression* e = new binary_expression;
3048 e->left = op1;
3049 e->op = t->content;
3050 e->tok = t;
3051 next ();
56099f08
FCE
3052 e->right = parse_array_in ();
3053 op1 = e;
3054 t = peek ();
2f1a1aea 3055 }
56099f08
FCE
3056
3057 return op1;
2f1a1aea
FCE
3058}
3059
3060
3061expression*
3062parser::parse_array_in ()
3063{
ce10591c 3064 // This is a very tricky case. All these are legit expressions:
69c68955 3065 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
ce10591c
FCE
3066 vector<expression*> indexes;
3067 bool parenthesized = false;
2f1a1aea
FCE
3068
3069 const token* t = peek ();
69c68955 3070 if (t && t->type == tok_operator && t->content == "[")
ce10591c 3071 {
731a5359 3072 swallow ();
ce10591c
FCE
3073 parenthesized = true;
3074 }
3075
3076 while (1)
3077 {
93daaca8 3078 expression* op1 = parse_comparison_or_regex_query ();
ce10591c
FCE
3079 indexes.push_back (op1);
3080
3081 if (parenthesized)
3082 {
3083 const token* t = peek ();
3084 if (t && t->type == tok_operator && t->content == ",")
3085 {
731a5359 3086 swallow ();
ce10591c
FCE
3087 continue;
3088 }
69c68955 3089 else if (t && t->type == tok_operator && t->content == "]")
ce10591c 3090 {
731a5359 3091 swallow ();
ce10591c
FCE
3092 break;
3093 }
dff50e09 3094 else
f0454224 3095 throw PARSE_ERROR (_("expected ',' or ']'"));
ce10591c
FCE
3096 }
3097 else
3098 break; // expecting only one expression
3099 }
3100
3101 t = peek ();
6e213f58 3102 if (t && t->type == tok_keyword && t->content == "in")
2f1a1aea 3103 {
2f1a1aea 3104 array_in *e = new array_in;
56099f08 3105 e->tok = t;
731a5359 3106 next ();
ce10591c
FCE
3107
3108 arrayindex* a = new arrayindex;
3109 a->indexes = indexes;
d02548c0 3110 a->base = parse_indexable();
d15d767c 3111 a->tok = a->base->tok;
ce10591c 3112 e->operand = a;
2f1a1aea
FCE
3113 return e;
3114 }
ce10591c
FCE
3115 else if (indexes.size() == 1) // no "in" - need one expression only
3116 return indexes[0];
2f1a1aea 3117 else
f0454224 3118 throw PARSE_ERROR (_("unexpected comma-separated expression list"));
2f1a1aea
FCE
3119}
3120
3121
3122expression*
93daaca8 3123parser::parse_comparison_or_regex_query ()
2f1a1aea 3124{
bb2e3076 3125 expression* op1 = parse_shift ();
2f1a1aea 3126
557abe61 3127 // XXX precedence -- perhaps a =~ b == c =~ d --> (a =~ b) == (c =~ d) ?
93daaca8
SM
3128 const token *t = peek();
3129 if (t && t->type == tok_operator
3130 && (t->content == "=~" ||
3131 t->content == "!~"))
3132 {
3133 regex_query* r = new regex_query;
3134 r->left = op1;
3135 r->op = t->content;
3136 r->tok = t;
3137 next ();
d3bc48f0 3138 r->right = parse_literal_string();
93daaca8
SM
3139 op1 = r;
3140 t = peek ();
3141 }
3142 else while (t && t->type == tok_operator
553d27a5
FCE
3143 && (t->content == ">" ||
3144 t->content == "<" ||
3145 t->content == "==" ||
3146 t->content == "!=" ||
3147 t->content == "<=" ||
bb2e3076 3148 t->content == ">="))
2f1a1aea
FCE
3149 {
3150 comparison* e = new comparison;
3151 e->left = op1;
3152 e->op = t->content;
56099f08 3153 e->tok = t;
2f1a1aea 3154 next ();
bb2e3076
FCE
3155 e->right = parse_shift ();
3156 op1 = e;
3157 t = peek ();
3158 }
3159
3160 return op1;
3161}
3162
3163
3164expression*
3165parser::parse_shift ()
3166{
3167 expression* op1 = parse_concatenation ();
3168
3169 const token* t = peek ();
dff50e09 3170 while (t && t->type == tok_operator &&
bb2e3076
FCE
3171 (t->content == "<<" || t->content == ">>"))
3172 {
3173 binary_expression* e = new binary_expression;
3174 e->left = op1;
3175 e->op = t->content;
3176 e->tok = t;
3177 next ();
56099f08
FCE
3178 e->right = parse_concatenation ();
3179 op1 = e;
3180 t = peek ();
2f1a1aea 3181 }
56099f08
FCE
3182
3183 return op1;
2f1a1aea
FCE
3184}
3185
3186
3187expression*
3188parser::parse_concatenation ()
3189{
3190 expression* op1 = parse_additive ();
3191
3192 const token* t = peek ();
3193 // XXX: the actual awk string-concatenation operator is *whitespace*.
3194 // I don't know how to easily to model that here.
56099f08 3195 while (t && t->type == tok_operator && t->content == ".")
2f1a1aea
FCE
3196 {
3197 concatenation* e = new concatenation;
3198 e->left = op1;
3199 e->op = t->content;
56099f08 3200 e->tok = t;
2f1a1aea 3201 next ();
56099f08
FCE
3202 e->right = parse_additive ();
3203 op1 = e;
3204 t = peek ();
2f1a1aea 3205 }
56099f08
FCE
3206
3207 return op1;
2f1a1aea
FCE
3208}
3209
3210
3211expression*
3212parser::parse_additive ()
3213{
3214 expression* op1 = parse_multiplicative ();
3215
3216 const token* t = peek ();
dff50e09 3217 while (t && t->type == tok_operator
2f1a1aea
FCE
3218 && (t->content == "+" || t->content == "-"))
3219 {
3220 binary_expression* e = new binary_expression;
3221 e->op = t->content;
3222 e->left = op1;
56099f08 3223 e->tok = t;
2f1a1aea 3224 next ();
56099f08
FCE
3225 e->right = parse_multiplicative ();
3226 op1 = e;
3227 t = peek ();
2f1a1aea 3228 }
56099f08
FCE
3229
3230 return op1;
2f1a1aea
FCE
3231}
3232
3233
3234expression*
3235parser::parse_multiplicative ()
3236{
3237 expression* op1 = parse_unary ();
3238
3239 const token* t = peek ();
dff50e09 3240 while (t && t->type == tok_operator
2f1a1aea
FCE
3241 && (t->content == "*" || t->content == "/" || t->content == "%"))
3242 {
3243 binary_expression* e = new binary_expression;
3244 e->op = t->content;
3245 e->left = op1;
56099f08 3246 e->tok = t;
2f1a1aea 3247 next ();
56099f08
FCE
3248 e->right = parse_unary ();
3249 op1 = e;
3250 t = peek ();
2f1a1aea 3251 }
56099f08
FCE
3252
3253 return op1;
2f1a1aea
FCE
3254}
3255
3256
3257expression*
3258parser::parse_unary ()
3259{
3260 const token* t = peek ();
dff50e09
FCE
3261 if (t && t->type == tok_operator
3262 && (t->content == "+" ||
3263 t->content == "-" ||
bb2e3076
FCE
3264 t->content == "!" ||
3265 t->content == "~" ||
3266 false))
2f1a1aea
FCE
3267 {
3268 unary_expression* e = new unary_expression;
3269 e->op = t->content;
56099f08 3270 e->tok = t;
2f1a1aea 3271 next ();
1cb79a72 3272 e->operand = parse_unary ();
2f1a1aea
FCE
3273 return e;
3274 }
3275 else
bb2e3076 3276 return parse_crement ();
2f1a1aea
FCE
3277}
3278
3279
3280expression*
3281parser::parse_crement () // as in "increment" / "decrement"
3282{
cbfbbf69
FCE
3283 // NB: Ideally, we'd parse only a symbol as an operand to the
3284 // *crement operators, instead of a general expression value. We'd
3285 // need more complex lookahead code to tell apart the postfix cases.
3286 // So we just punt, and leave it to pass-3 to signal errors on
3287 // cases like "4++".
3288
2f1a1aea 3289 const token* t = peek ();
dff50e09 3290 if (t && t->type == tok_operator
2f1a1aea
FCE
3291 && (t->content == "++" || t->content == "--"))
3292 {
3293 pre_crement* e = new pre_crement;
3294 e->op = t->content;
56099f08 3295 e->tok = t;
2f1a1aea
FCE
3296 next ();
3297 e->operand = parse_value ();
3298 return e;
3299 }
3300
3301 // post-crement or non-crement
3302 expression *op1 = parse_value ();
dff50e09 3303
2f1a1aea 3304 t = peek ();
dff50e09 3305 if (t && t->type == tok_operator
2f1a1aea
FCE
3306 && (t->content == "++" || t->content == "--"))
3307 {
3308 post_crement* e = new post_crement;
3309 e->op = t->content;
56099f08 3310 e->tok = t;
2f1a1aea
FCE
3311 next ();
3312 e->operand = op1;
3313 return e;
3314 }
3315 else
3316 return op1;
3317}
3318
3319
3320expression*
3321parser::parse_value ()
3322{
3323 const token* t = peek ();
3324 if (! t)
f0454224 3325 throw PARSE_ERROR (_("expected value"));
2f1a1aea 3326
7d902887
FCE
3327 if (t->type == tok_embedded)
3328 {
7d902887 3329 if (! privileged)
f0454224 3330 throw PARSE_ERROR (_("embedded expression code in unprivileged script; need stap -g"), false);
7d902887
FCE
3331
3332 embedded_expr *e = new embedded_expr;
3333 e->tok = t;
3334 e->code = t->content;
731a5359 3335 next ();
7d902887
FCE
3336 return e;
3337 }
3338
2f1a1aea
FCE
3339 if (t->type == tok_operator && t->content == "(")
3340 {
731a5359 3341 swallow ();
2f1a1aea
FCE
3342 expression* e = parse_expression ();
3343 t = next ();
3344 if (! (t->type == tok_operator && t->content == ")"))
f0454224 3345 throw PARSE_ERROR (_("expected ')'"));
731a5359 3346 swallow ();
2f1a1aea
FCE
3347 return e;
3348 }
03c75a4a
JS
3349 else if (t->type == tok_operator && t->content == "&")
3350 {
731a5359 3351 next (); // Cannot swallow, passing token on...
d48afc20 3352 return parse_target_symbol (t);
03c75a4a 3353 }
06219d6f
SM
3354 else if (t->type == tok_identifier
3355 || (t->type == tok_operator && t->content[0] == '@'))
2f1a1aea
FCE
3356 return parse_symbol ();
3357 else
3358 return parse_literal ();
3359}
3360
3361
d02548c0
GH
3362const token *
3363parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name)
3364{
3365 hop = NULL;
50cc7cd5 3366 const token* t = expect_ident_or_atword (name);
d02548c0
GH
3367 if (name == "@hist_linear" || name == "@hist_log")
3368 {
3369 hop = new hist_op;
3370 if (name == "@hist_linear")
3371 hop->htype = hist_linear;
3372 else if (name == "@hist_log")
3373 hop->htype = hist_log;
3374 hop->tok = t;
3375 expect_op("(");
3376 hop->stat = parse_expression ();
3377 int64_t tnum;
3378 if (hop->htype == hist_linear)
3379 {
3380 for (size_t i = 0; i < 3; ++i)
3381 {
3382 expect_op (",");
3383 expect_number (tnum);
3384 hop->params.push_back (tnum);
3385 }
3386 }
d02548c0
GH
3387 expect_op(")");
3388 }
3389 return t;
3390}
3391
3392
3393indexable*
3394parser::parse_indexable ()
3395{
3396 hist_op *hop = NULL;
3397 string name;
3398 const token *tok = parse_hist_op_or_bare_name(hop, name);
3399 if (hop)
3400 return hop;
3401 else
3402 {
3403 symbol* sym = new symbol;
3404 sym->name = name;
3405 sym->tok = tok;
3406 return sym;
3407 }
3408}
3409
3410
cc9001af
MW
3411// var, indexable[index], func(parms), printf("...", ...), $var,r
3412// @cast, @defined, @entry, @var, $var->member, @stat_op(stat)
30263a73 3413expression* parser::parse_symbol ()
2f1a1aea 3414{
d02548c0
GH
3415 hist_op *hop = NULL;
3416 symbol *sym = NULL;
d7f3e0c5 3417 string name;
d02548c0
GH
3418 const token *t = parse_hist_op_or_bare_name(hop, name);
3419
3420 if (!hop)
0fefb486 3421 {
dff50e09 3422 // If we didn't get a hist_op, then we did get an identifier. We can
d02548c0
GH
3423 // now scrutinize this identifier for the various magic forms of identifier
3424 // (printf, @stat_op, and $var...)
3425
cc9001af
MW
3426 if (name == "@cast"
3427 || name == "@var"
3428 || (name.size() > 0 && name[0] == '$'))
30263a73 3429 return parse_target_symbol (t);
9b5af295 3430
db135493
FCE
3431 // NB: PR11343: @defined() is not incompatible with earlier versions
3432 // of stap, so no need to check session.compatible for 1.2
30263a73
FCE
3433 if (name == "@defined")
3434 return parse_defined_op (t);
8cc799a5
JS
3435
3436 if (name == "@entry")
3437 return parse_entry_op (t);
3438
3689db05
SC
3439 if (name == "@perf")
3440 return parse_perf_op (t);
3441
cc9001af 3442 if (name.size() > 0 && name[0] == '@')
d7f3e0c5 3443 {
d02548c0
GH
3444 stat_op *sop = new stat_op;
3445 if (name == "@avg")
3446 sop->ctype = sc_average;
3447 else if (name == "@count")
3448 sop->ctype = sc_count;
3449 else if (name == "@sum")
3450 sop->ctype = sc_sum;
3451 else if (name == "@min")
3452 sop->ctype = sc_min;
3453 else if (name == "@max")
3454 sop->ctype = sc_max;
3455 else
f0454224 3456 throw PARSE_ERROR(_("unknown operator ") + name);
d02548c0
GH
3457 expect_op("(");
3458 sop->tok = t;
3459 sop->stat = parse_expression ();
3460 expect_op(")");
3461 return sop;
3462 }
dff50e09 3463
d5e178c1 3464 else if (print_format *fmt = print_format::create(t))
d02548c0 3465 {
d02548c0 3466 expect_op("(");
b15c465c
PP
3467 if ((name == "print" || name == "println" ||
3468 name == "sprint" || name == "sprintln") &&
f34254da 3469 (peek_op("@hist_linear") || peek_op("@hist_log")))
a4636912
GH
3470 {
3471 // We have a special case where we recognize
3472 // print(@hist_foo(bar)) as a magic print-the-histogram
3473 // construct. This is sort of gross but it avoids
3474 // promoting histogram references to typeful
3475 // expressions.
dff50e09 3476
1bbeef03
GH
3477 hop = NULL;
3478 t = parse_hist_op_or_bare_name(hop, name);
3479 assert(hop);
dff50e09 3480
1bbeef03
GH
3481 // It is, sadly, possible that even while parsing a
3482 // hist_op, we *mis-guessed* and the user wishes to
3483 // print(@hist_op(foo)[bucket]), a scalar. In that case
3484 // we must parse the arrayindex and print an expression.
839325a1
JS
3485 //
3486 // XXX: This still fails if the arrayindex is part of a
3487 // larger expression. To really handle everything, we'd
3488 // need to push back all the hist tokens start over.
dff50e09 3489
1bbeef03
GH
3490 if (!peek_op ("["))
3491 fmt->hist = hop;
3492 else
3493 {
3494 // This is simplified version of the
3495 // multi-array-index parser below, because we can
3496 // only ever have one index on a histogram anyways.
3497 expect_op("[");
3498 struct arrayindex* ai = new arrayindex;
3499 ai->tok = t;
3500 ai->base = hop;
3501 ai->indexes.push_back (parse_expression ());
3502 expect_op("]");
3503 fmt->args.push_back(ai);
839325a1
JS
3504
3505 // Consume any subsequent arguments.
3506 while (!peek_op (")"))
3507 {
3508 expect_op(",");
3509 expression *e = parse_expression ();
3510 fmt->args.push_back(e);
3511 }
1bbeef03 3512 }
a4636912 3513 }
d7f3e0c5 3514 else
d02548c0 3515 {
3cb17058 3516 int min_args = 0;
80cb29eb 3517 bool consumed_arg = false;
3cb17058
JS
3518 if (fmt->print_with_format)
3519 {
3520 // Consume and convert a format string. Agreement between the
3521 // format string and the arguments is postponed to the
3522 // typechecking phase.
3523 string tmp;
3524 expect_unknown (tok_string, tmp);
3525 fmt->raw_components = tmp;
3526 fmt->components = print_format::string_to_components (tmp);
80cb29eb 3527 consumed_arg = true;
3cb17058
JS
3528 }
3529 else if (fmt->print_with_delim)
3530 {
3531 // Consume a delimiter to separate arguments.
3532 fmt->delimiter.clear();
3533 fmt->delimiter.type = print_format::conv_literal;
3534 expect_unknown (tok_string, fmt->delimiter.literal_string);
80cb29eb
JL
3535 consumed_arg = true;
3536 min_args = 2; // so that the delim is used at least once
3cb17058 3537 }
80cb29eb 3538 else if (!fmt->print_with_newline)
3cb17058 3539 {
80cb29eb
JL
3540 // If we are not printing with a format string, nor with a
3541 // delim, nor with a newline, then it's either print() or
3542 // sprint(), both of which require at least one argument (of
3543 // any type).
3544 min_args = 1;
3cb17058
JS
3545 }
3546
3547 // Consume any subsequent arguments.
3548 while (min_args || !peek_op (")"))
3549 {
80cb29eb
JL
3550 if (consumed_arg)
3551 expect_op(",");
3cb17058
JS
3552 expression *e = parse_expression ();
3553 fmt->args.push_back(e);
80cb29eb 3554 consumed_arg = true;
3cb17058
JS
3555 if (min_args)
3556 --min_args;
3557 }
d02548c0
GH
3558 }
3559 expect_op(")");
3560 return fmt;
3561 }
dff50e09 3562
d02548c0
GH
3563 else if (peek_op ("(")) // function call
3564 {
731a5359 3565 swallow ();
d02548c0
GH
3566 struct functioncall* f = new functioncall;
3567 f->tok = t;
3568 f->function = name;
3569 // Allow empty actual parameter list
3570 if (peek_op (")"))
3571 {
731a5359 3572 swallow ();
d02548c0
GH
3573 return f;
3574 }
3575 while (1)
3576 {
3577 f->args.push_back (parse_expression ());
3578 if (peek_op (")"))
3579 {
731a5359 3580 swallow ();
d02548c0
GH
3581 break;
3582 }
3583 else if (peek_op (","))
3584 {
731a5359 3585 swallow ();
d02548c0
GH
3586 continue;
3587 }
3588 else
f0454224 3589 throw PARSE_ERROR (_("expected ',' or ')'"));
d02548c0
GH
3590 }
3591 return f;
3592 }
3593
3594 else
3595 {
3596 sym = new symbol;
3597 sym->name = name;
3598 sym->tok = t;
d7f3e0c5 3599 }
0fefb486 3600 }
dff50e09
FCE
3601
3602 // By now, either we had a hist_op in the first place, or else
d02548c0
GH
3603 // we had a plain word and it was converted to a symbol.
3604
70c743d8 3605 assert (!hop != !sym); // logical XOR
d02548c0
GH
3606
3607 // All that remains is to check for array indexing
3608
d7f3e0c5 3609 if (peek_op ("[")) // array
2f1a1aea 3610 {
731a5359 3611 swallow ();
2f1a1aea 3612 struct arrayindex* ai = new arrayindex;
d02548c0
GH
3613 ai->tok = t;
3614
3615 if (hop)
3616 ai->base = hop;
3617 else
3618 ai->base = sym;
3619
2f1a1aea
FCE
3620 while (1)
3621 {
3622 ai->indexes.push_back (parse_expression ());
d7f3e0c5 3623 if (peek_op ("]"))
dff50e09 3624 {
731a5359 3625 swallow ();
dff50e09 3626 break;
d7f3e0c5
GH
3627 }
3628 else if (peek_op (","))
3629 {
731a5359 3630 swallow ();
d7f3e0c5
GH
3631 continue;
3632 }
2f1a1aea 3633 else
f0454224 3634 throw PARSE_ERROR (_("expected ',' or ']'"));
2f1a1aea
FCE
3635 }
3636 return ai;
3637 }
d02548c0
GH
3638
3639 // If we got to here, we *should* have a symbol; if we have
3640 // a hist_op on its own, it doesn't count as an expression,
3641 // so we throw a parse error.
3642
3643 if (hop)
f0454224 3644 throw PARSE_ERROR(_("base histogram operator where expression expected"), t);
dff50e09
FCE
3645
3646 return sym;
2f1a1aea 3647}
56099f08 3648
30263a73
FCE
3649// Parse a @cast or $var. Given head token has already been consumed.
3650target_symbol* parser::parse_target_symbol (const token* t)
3651{
d48afc20
JS
3652 bool addressof = false;
3653 if (t->type == tok_operator && t->content == "&")
3654 {
3655 addressof = true;
3819d181
MW
3656 // Don't delete t before trying next token.
3657 // We might need it in the error message when there is no next token.
3658 const token *next_t = next ();
731a5359 3659 delete t;
3819d181 3660 t = next_t;
d48afc20
JS
3661 }
3662
06219d6f 3663 if (t->type == tok_operator && t->content == "@cast")
30263a73
FCE
3664 {
3665 cast_op *cop = new cast_op;
3666 cop->tok = t;
277c21bc 3667 cop->name = t->content;
30263a73
FCE
3668 expect_op("(");
3669 cop->operand = parse_expression ();
3670 expect_op(",");
7f6b80bd 3671 expect_unknown(tok_string, cop->type_name);
30263a73
FCE
3672 if (peek_op (","))
3673 {
731a5359 3674 swallow ();
30263a73
FCE
3675 expect_unknown(tok_string, cop->module);
3676 }
3677 expect_op(")");
3678 parse_target_symbol_components(cop);
d48afc20 3679 cop->addressof = addressof;
30263a73
FCE
3680 return cop;
3681 }
3682
3683 if (t->type == tok_identifier && t->content[0]=='$')
3684 {
3685 // target_symbol time
3686 target_symbol *tsym = new target_symbol;
3687 tsym->tok = t;
277c21bc 3688 tsym->name = t->content;
30263a73 3689 parse_target_symbol_components(tsym);
d48afc20 3690 tsym->addressof = addressof;
30263a73
FCE
3691 return tsym;
3692 }
3693
06219d6f 3694 if (t->type == tok_operator && t->content == "@var")
cc9001af 3695 {
bd1fcbad
YZ
3696 atvar_op *aop = new atvar_op;
3697 aop->tok = t;
3698 aop->name = t->content;
cc9001af 3699 expect_op("(");
bd1fcbad
YZ
3700 expect_unknown(tok_string, aop->target_name);
3701 size_t found_at = aop->target_name.find("@");
bfa7e523 3702 if (found_at != string::npos)
bd1fcbad 3703 aop->cu_name = aop->target_name.substr(found_at + 1);
bfa7e523 3704 else
bd1fcbad
YZ
3705 aop->cu_name = "";
3706 if (peek_op (","))
3707 {
3708 swallow ();
3709 expect_unknown (tok_string, aop->module);
3710 }
3711 else
3712 aop->module = "";
cc9001af 3713 expect_op(")");
bd1fcbad
YZ
3714 parse_target_symbol_components(aop);
3715 aop->addressof = addressof;
3716 return aop;
cc9001af
MW
3717 }
3718
f0454224 3719 throw PARSE_ERROR (_("expected @cast, @var or $var"));
30263a73
FCE
3720}
3721
3722
3723// Parse a @defined(). Given head token has already been consumed.
3724expression* parser::parse_defined_op (const token* t)
3725{
3726 defined_op* dop = new defined_op;
3727 dop->tok = t;
3728 expect_op("(");
30263a73 3729 // no need for parse_hist_op... etc., as @defined takes only target_symbols as its operand.
d48afc20 3730 const token* tt = next ();
30263a73
FCE
3731 dop->operand = parse_target_symbol (tt);
3732 expect_op(")");
3733 return dop;
3734}
3735
3736
8cc799a5
JS
3737// Parse a @entry(). Given head token has already been consumed.
3738expression* parser::parse_entry_op (const token* t)
3739{
3740 entry_op* eop = new entry_op;
3741 eop->tok = t;
3742 expect_op("(");
3743 eop->operand = parse_expression ();
3744 expect_op(")");
3745 return eop;
3746}
3747
3748
3689db05
SC
3749// Parse a @perf(). Given head token has already been consumed.
3750expression* parser::parse_perf_op (const token* t)
3751{
3752 perf_op* pop = new perf_op;
3753 pop->tok = t;
3754 expect_op("(");
ace7c23f
FCE
3755 pop->operand = parse_literal_string ();
3756 if (pop->operand->value == "")
f0454224 3757 throw PARSE_ERROR (_("expected non-empty string"));
3689db05
SC
3758 expect_op(")");
3759 return pop;
3760}
3761
3762
30263a73 3763
81931eab
JS
3764void
3765parser::parse_target_symbol_components (target_symbol* e)
3766{
5f36109e
JS
3767 bool pprint = false;
3768
3769 // check for pretty-print in the form $foo$
277c21bc 3770 string &base = e->name;
5f36109e
JS
3771 size_t pprint_pos = base.find_last_not_of('$');
3772 if (0 < pprint_pos && pprint_pos < base.length() - 1)
3773 {
3774 string pprint_val = base.substr(pprint_pos + 1);
3775 base.erase(pprint_pos + 1);
3776 e->components.push_back (target_symbol::component(e->tok, pprint_val, true));
3777 pprint = true;
3778 }
3779
3780 while (!pprint)
81931eab 3781 {
81931eab
JS
3782 if (peek_op ("->"))
3783 {
c67847a0
JS
3784 const token* t = next();
3785 string member;
3786 expect_ident_or_keyword (member);
5f36109e
JS
3787
3788 // check for pretty-print in the form $foo->$ or $foo->bar$
3789 pprint_pos = member.find_last_not_of('$');
3790 string pprint_val;
3791 if (pprint_pos == string::npos || pprint_pos < member.length() - 1)
3792 {
3793 pprint_val = member.substr(pprint_pos + 1);
3794 member.erase(pprint_pos + 1);
3795 pprint = true;
3796 }
3797
3798 if (!member.empty())
3799 e->components.push_back (target_symbol::component(t, member));
3800 if (pprint)
3801 e->components.push_back (target_symbol::component(t, pprint_val, true));
81931eab
JS
3802 }
3803 else if (peek_op ("["))
3804 {
c67847a0 3805 const token* t = next();
6fda2dff
JS
3806 expression* index = parse_expression();
3807 literal_number* ln = dynamic_cast<literal_number*>(index);
3808 if (ln)
3809 e->components.push_back (target_symbol::component(t, ln->value));
3810 else
3811 e->components.push_back (target_symbol::component(t, index));
81931eab 3812 expect_op ("]");
81931eab
JS
3813 }
3814 else
3815 break;
3816 }
5f36109e
JS
3817
3818 if (!pprint)
3819 {
3820 // check for pretty-print in the form $foo $
3821 // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$
3822 const token* t = peek();
3819d181 3823 if (t != NULL && t->type == tok_identifier &&
5f36109e
JS
3824 t->content.find_first_not_of('$') == string::npos)
3825 {
3826 t = next();
3827 e->components.push_back (target_symbol::component(t, t->content, true));
3828 pprint = true;
3829 }
3830 }
3831
3832 if (pprint && (peek_op ("->") || peek_op("[")))
f0454224 3833 throw PARSE_ERROR(_("-> and [ are not accepted for a pretty-printing variable"));
81931eab
JS
3834}
3835
73267b89 3836/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.728259 seconds and 5 git commands to generate.