]>
Commit | Line | Data |
---|---|---|
2f1a1aea | 1 | // recursive descent parser for systemtap scripts |
f4fe2e93 | 2 | // Copyright (C) 2005-2010 Red Hat Inc. |
77a5c1f9 | 3 | // Copyright (C) 2006 Intel Corporation. |
5811366a | 4 | // Copyright (C) 2007 Bull S.A.S |
69c68955 FCE |
5 | // |
6 | // This file is part of systemtap, and is free software. You can | |
7 | // redistribute it and/or modify it under the terms of the GNU General | |
8 | // Public License (GPL); either version 2, or (at your option) any | |
9 | // later version. | |
2f1a1aea | 10 | |
2b066ec1 | 11 | #include "config.h" |
2f1a1aea FCE |
12 | #include "staptree.h" |
13 | #include "parse.h" | |
177a8ead | 14 | #include "session.h" |
3f99432c FCE |
15 | #include "util.h" |
16 | ||
2b066ec1 | 17 | #include <iostream> |
eacb10ce | 18 | |
2b066ec1 | 19 | #include <fstream> |
2f1a1aea | 20 | #include <cctype> |
9c0c0e46 | 21 | #include <cstdlib> |
29e64872 | 22 | #include <cassert> |
9c0c0e46 FCE |
23 | #include <cerrno> |
24 | #include <climits> | |
57b73400 | 25 | #include <sstream> |
f74fb737 | 26 | #include <cstring> |
3f99432c | 27 | #include <cctype> |
eacb10ce FCE |
28 | #include <iterator> |
29 | ||
7a468d68 FCE |
30 | extern "C" { |
31 | #include <fnmatch.h> | |
32 | } | |
2f1a1aea FCE |
33 | |
34 | using namespace std; | |
35 | ||
c18f07f8 JS |
36 | |
37 | class lexer | |
38 | { | |
39 | public: | |
c5be7511 | 40 | bool ate_comment; // the most recent token followed a comment |
c18f07f8 JS |
41 | token* scan (bool wildcard=false); |
42 | lexer (istream&, const string&, systemtap_session&); | |
43 | void set_current_file (stapfile* f); | |
44 | ||
45 | private: | |
46 | inline int input_get (); | |
47 | inline int input_peek (unsigned n=0); | |
48 | void input_put (const string&, const token*); | |
49 | string input_name; | |
50 | string input_contents; | |
51 | const char *input_pointer; // index into input_contents | |
52 | const char *input_end; | |
53 | unsigned cursor_suspend_count; | |
54 | unsigned cursor_suspend_line; | |
55 | unsigned cursor_suspend_column; | |
56 | unsigned cursor_line; | |
57 | unsigned cursor_column; | |
58 | systemtap_session& session; | |
59 | stapfile* current_file; | |
60 | static set<string> keywords; | |
61 | }; | |
62 | ||
63 | ||
64 | class parser | |
65 | { | |
66 | public: | |
67 | parser (systemtap_session& s, istream& i, bool p); | |
68 | parser (systemtap_session& s, const string& n, bool p); | |
69 | ~parser (); | |
70 | ||
71 | stapfile* parse (); | |
72 | ||
73 | private: | |
74 | typedef enum { | |
75 | PP_NONE, | |
76 | PP_KEEP_THEN, | |
77 | PP_SKIP_THEN, | |
78 | PP_KEEP_ELSE, | |
79 | PP_SKIP_ELSE, | |
80 | } pp_state_t; | |
81 | ||
82 | systemtap_session& session; | |
83 | string input_name; | |
84 | istream* free_input; | |
85 | lexer input; | |
86 | bool privileged; | |
87 | parse_context context; | |
88 | ||
89 | // preprocessing subordinate | |
90 | vector<pair<const token*, pp_state_t> > pp_state; | |
91 | const token* scan_pp (bool wildcard=false); | |
92 | const token* skip_pp (); | |
93 | ||
94 | // scanning state | |
95 | const token* last (); | |
96 | const token* next (bool wildcard=false); | |
97 | const token* peek (bool wildcard=false); | |
98 | ||
99 | const token* last_t; // the last value returned by peek() or next() | |
100 | const token* next_t; // lookahead token | |
101 | ||
102 | // expectations | |
103 | const token* expect_known (token_type tt, string const & expected); | |
104 | const token* expect_unknown (token_type tt, string & target); | |
105 | const token* expect_unknown2 (token_type tt1, token_type tt2, | |
106 | string & target); | |
107 | ||
108 | // convenience forms | |
109 | const token* expect_op (string const & expected); | |
110 | const token* expect_kw (string const & expected); | |
111 | const token* expect_number (int64_t & expected); | |
112 | const token* expect_ident (string & target); | |
113 | const token* expect_ident_or_keyword (string & target); | |
114 | bool peek_op (string const & op); | |
115 | bool peek_kw (string const & kw); | |
116 | ||
117 | void print_error (const parse_error& pe); | |
118 | unsigned num_errors; | |
119 | ||
120 | private: // nonterminals | |
121 | void parse_probe (vector<probe*>&, vector<probe_alias*>&); | |
122 | void parse_global (vector<vardecl*>&, vector<probe*>&); | |
123 | void parse_functiondecl (vector<functiondecl*>&); | |
124 | embeddedcode* parse_embeddedcode (); | |
125 | probe_point* parse_probe_point (); | |
126 | literal* parse_literal (); | |
127 | block* parse_stmt_block (); | |
128 | try_block* parse_try_block (); | |
129 | statement* parse_statement (); | |
130 | if_statement* parse_if_statement (); | |
131 | for_loop* parse_for_loop (); | |
132 | for_loop* parse_while_loop (); | |
133 | foreach_loop* parse_foreach_loop (); | |
134 | expr_statement* parse_expr_statement (); | |
135 | return_statement* parse_return_statement (); | |
136 | delete_statement* parse_delete_statement (); | |
137 | next_statement* parse_next_statement (); | |
138 | break_statement* parse_break_statement (); | |
139 | continue_statement* parse_continue_statement (); | |
140 | indexable* parse_indexable (); | |
141 | const token *parse_hist_op_or_bare_name (hist_op *&hop, string &name); | |
142 | target_symbol *parse_target_symbol (const token* t); | |
143 | expression* parse_defined_op (const token* t); | |
144 | expression* parse_expression (); | |
145 | expression* parse_assignment (); | |
146 | expression* parse_ternary (); | |
147 | expression* parse_logical_or (); | |
148 | expression* parse_logical_and (); | |
149 | expression* parse_boolean_or (); | |
150 | expression* parse_boolean_xor (); | |
151 | expression* parse_boolean_and (); | |
152 | expression* parse_array_in (); | |
153 | expression* parse_comparison (); | |
154 | expression* parse_shift (); | |
155 | expression* parse_concatenation (); | |
156 | expression* parse_additive (); | |
157 | expression* parse_multiplicative (); | |
158 | expression* parse_unary (); | |
159 | expression* parse_crement (); | |
160 | expression* parse_value (); | |
161 | expression* parse_symbol (); | |
162 | ||
163 | void parse_target_symbol_components (target_symbol* e); | |
164 | }; | |
165 | ||
166 | ||
2f1a1aea FCE |
167 | // ------------------------------------------------------------------------ |
168 | ||
c18f07f8 JS |
169 | stapfile* |
170 | parse (systemtap_session& s, istream& i, bool pr) | |
171 | { | |
172 | parser p (s, i, pr); | |
173 | return p.parse (); | |
174 | } | |
175 | ||
176 | ||
177 | stapfile* | |
178 | parse (systemtap_session& s, const string& n, bool pr) | |
179 | { | |
180 | parser p (s, n, pr); | |
181 | return p.parse (); | |
182 | } | |
183 | ||
184 | // ------------------------------------------------------------------------ | |
bb2e3076 FCE |
185 | |
186 | ||
177a8ead FCE |
187 | parser::parser (systemtap_session& s, istream& i, bool p): |
188 | session (s), | |
24cb178f | 189 | input_name ("<input>"), free_input (0), |
213bee8f | 190 | input (i, input_name, s), privileged (p), |
6e213f58 | 191 | context(con_unknown), last_t (0), next_t (0), num_errors (0) |
2f1a1aea FCE |
192 | { } |
193 | ||
177a8ead FCE |
194 | parser::parser (systemtap_session& s, const string& fn, bool p): |
195 | session (s), | |
2f1a1aea | 196 | input_name (fn), free_input (new ifstream (input_name.c_str(), ios::in)), |
213bee8f | 197 | input (* free_input, input_name, s), privileged (p), |
6e213f58 | 198 | context(con_unknown), last_t (0), next_t (0), num_errors (0) |
2f1a1aea FCE |
199 | { } |
200 | ||
201 | parser::~parser() | |
202 | { | |
203 | if (free_input) delete free_input; | |
204 | } | |
205 | ||
d7f3e0c5 GH |
206 | static string |
207 | tt2str(token_type tt) | |
208 | { | |
209 | switch (tt) | |
210 | { | |
211 | case tok_junk: return "junk"; | |
212 | case tok_identifier: return "identifier"; | |
213 | case tok_operator: return "operator"; | |
214 | case tok_string: return "string"; | |
215 | case tok_number: return "number"; | |
216 | case tok_embedded: return "embedded-code"; | |
6e213f58 | 217 | case tok_keyword: return "keyword"; |
d7f3e0c5 GH |
218 | } |
219 | return "unknown token"; | |
220 | } | |
82919855 | 221 | |
0323ed4d WC |
222 | ostream& |
223 | operator << (ostream& o, const source_loc& loc) | |
224 | { | |
a704a23b | 225 | o << loc.file->name << ":" |
0323ed4d WC |
226 | << loc.line << ":" |
227 | << loc.column; | |
228 | ||
229 | return o; | |
230 | } | |
231 | ||
56099f08 FCE |
232 | ostream& |
233 | operator << (ostream& o, const token& t) | |
234 | { | |
d7f3e0c5 | 235 | o << tt2str(t.type); |
56099f08 | 236 | |
6e213f58 | 237 | if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types? |
56099f08 | 238 | { |
24cb178f FCE |
239 | o << " '"; |
240 | for (unsigned i=0; i<t.content.length(); i++) | |
241 | { | |
242 | char c = t.content[i]; | |
243 | o << (isprint (c) ? c : '?'); | |
244 | } | |
245 | o << "'"; | |
56099f08 | 246 | } |
56099f08 | 247 | |
dff50e09 | 248 | o << " at " |
0323ed4d | 249 | << t.location; |
56099f08 FCE |
250 | |
251 | return o; | |
252 | } | |
253 | ||
254 | ||
dff50e09 | 255 | void |
2f1a1aea FCE |
256 | parser::print_error (const parse_error &pe) |
257 | { | |
1b1b4ceb | 258 | string align_parse_error (" "); |
2f1a1aea FCE |
259 | cerr << "parse error: " << pe.what () << endl; |
260 | ||
177a8ead FCE |
261 | if (pe.tok) |
262 | { | |
263 | cerr << "\tat: " << *pe.tok << endl; | |
1b1b4ceb | 264 | session.print_error_source (cerr, align_parse_error, pe.tok); |
177a8ead | 265 | } |
2f1a1aea | 266 | else |
177a8ead FCE |
267 | { |
268 | const token* t = last_t; | |
269 | if (t) | |
1b1b4ceb RA |
270 | { |
271 | cerr << "\tsaw: " << *t << endl; | |
272 | session.print_error_source (cerr, align_parse_error, t); | |
273 | } | |
177a8ead FCE |
274 | else |
275 | cerr << "\tsaw: " << input_name << " EOF" << endl; | |
276 | } | |
2f1a1aea FCE |
277 | |
278 | // XXX: make it possible to print the last input line, | |
279 | // so as to line up an arrow with the specific error column | |
280 | ||
281 | num_errors ++; | |
282 | } | |
283 | ||
284 | ||
dff50e09 | 285 | const token* |
2f1a1aea FCE |
286 | parser::last () |
287 | { | |
288 | return last_t; | |
289 | } | |
290 | ||
291 | ||
c434ec7e FCE |
292 | |
293 | template <typename OPERAND> | |
294 | bool eval_comparison (const OPERAND& lhs, const token* op, const OPERAND& rhs) | |
295 | { | |
296 | if (op->type == tok_operator && op->content == "<=") | |
297 | { return lhs <= rhs; } | |
298 | else if (op->type == tok_operator && op->content == ">=") | |
299 | { return lhs >= rhs; } | |
300 | else if (op->type == tok_operator && op->content == "<") | |
301 | { return lhs < rhs; } | |
302 | else if (op->type == tok_operator && op->content == ">") | |
303 | { return lhs > rhs; } | |
304 | else if (op->type == tok_operator && op->content == "==") | |
305 | { return lhs == rhs; } | |
306 | else if (op->type == tok_operator && op->content == "!=") | |
307 | { return lhs != rhs; } | |
308 | else | |
309 | throw parse_error ("expected comparison operator", op); | |
310 | } | |
311 | ||
312 | ||
177a8ead FCE |
313 | // Here, we perform on-the-fly preprocessing. |
314 | // The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %) | |
44ce8ed5 FCE |
315 | // where CONDITION is: kernel_v[r] COMPARISON-OP "version-string" |
316 | // or: arch COMPARISON-OP "arch-string" | |
db135493 | 317 | // or: systemtap_v COMPARISON-OP "version-string" |
561079c8 | 318 | // or: CONFIG_foo COMPARISON-OP "config-string" |
717a457b | 319 | // or: CONFIG_foo COMPARISON-OP number |
4227f98d | 320 | // or: CONFIG_foo COMPARISON-OP CONFIG_bar |
5811366a FCE |
321 | // or: "string1" COMPARISON-OP "string2" |
322 | // or: number1 COMPARISON-OP number2 | |
44ce8ed5 | 323 | // The %: ELSE-TOKENS part is optional. |
177a8ead FCE |
324 | // |
325 | // e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %) | |
c434ec7e | 326 | // e.g. %( arch != "i?86" %? "foo" %: "baz" %) |
561079c8 | 327 | // e.g. %( CONFIG_foo %? "foo" %: "baz" %) |
177a8ead FCE |
328 | // |
329 | // Up to an entire %( ... %) expression is processed by a single call | |
330 | // to this function. Tokens included by any nested conditions are | |
331 | // enqueued in a private vector. | |
332 | ||
333 | bool eval_pp_conditional (systemtap_session& s, | |
334 | const token* l, const token* op, const token* r) | |
335 | { | |
44ce8ed5 | 336 | if (l->type == tok_identifier && (l->content == "kernel_v" || |
db135493 FCE |
337 | l->content == "kernel_vr" || |
338 | l->content == "systemtap_v")) | |
44ce8ed5 | 339 | { |
db135493 FCE |
340 | if (! (r->type == tok_string)) |
341 | throw parse_error ("expected string literal", r); | |
342 | ||
44ce8ed5 | 343 | string target_kernel_vr = s.kernel_release; |
197a4d62 | 344 | string target_kernel_v = s.kernel_base_release; |
db135493 | 345 | string target; |
dff50e09 | 346 | |
db135493 FCE |
347 | if (l->content == "kernel_v") target = target_kernel_v; |
348 | else if (l->content == "kernel_vr") target = target_kernel_vr; | |
349 | else if (l->content == "systemtap_v") target = s.compatible; | |
350 | else assert (0); | |
7a468d68 | 351 | |
7a468d68 FCE |
352 | string query = r->content; |
353 | bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0); | |
354 | ||
44ce8ed5 FCE |
355 | // collect acceptable strverscmp results. |
356 | int rvc_ok1, rvc_ok2; | |
7a468d68 | 357 | bool wc_ok = false; |
44ce8ed5 FCE |
358 | if (op->type == tok_operator && op->content == "<=") |
359 | { rvc_ok1 = -1; rvc_ok2 = 0; } | |
360 | else if (op->type == tok_operator && op->content == ">=") | |
361 | { rvc_ok1 = 1; rvc_ok2 = 0; } | |
362 | else if (op->type == tok_operator && op->content == "<") | |
363 | { rvc_ok1 = -1; rvc_ok2 = -1; } | |
364 | else if (op->type == tok_operator && op->content == ">") | |
365 | { rvc_ok1 = 1; rvc_ok2 = 1; } | |
366 | else if (op->type == tok_operator && op->content == "==") | |
7a468d68 | 367 | { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; } |
44ce8ed5 | 368 | else if (op->type == tok_operator && op->content == "!=") |
7a468d68 | 369 | { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; } |
44ce8ed5 FCE |
370 | else |
371 | throw parse_error ("expected comparison operator", op); | |
7a468d68 FCE |
372 | |
373 | if ((!wc_ok) && rhs_wildcard) | |
374 | throw parse_error ("wildcard not allowed with order comparison operators", op); | |
375 | ||
376 | if (rhs_wildcard) | |
377 | { | |
378 | int rvc_result = fnmatch (query.c_str(), target.c_str(), | |
379 | FNM_NOESCAPE); // spooky | |
380 | bool badness = (rvc_result == 0) ^ (op->content == "=="); | |
381 | return !badness; | |
382 | } | |
383 | else | |
384 | { | |
385 | int rvc_result = strverscmp (target.c_str(), query.c_str()); | |
386 | // normalize rvc_result | |
387 | if (rvc_result < 0) rvc_result = -1; | |
388 | if (rvc_result > 0) rvc_result = 1; | |
389 | return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2); | |
390 | } | |
44ce8ed5 FCE |
391 | } |
392 | else if (l->type == tok_identifier && l->content == "arch") | |
393 | { | |
394 | string target_architecture = s.architecture; | |
395 | if (! (r->type == tok_string)) | |
396 | throw parse_error ("expected string literal", r); | |
397 | string query_architecture = r->content; | |
dff50e09 | 398 | |
7a468d68 FCE |
399 | int nomatch = fnmatch (query_architecture.c_str(), |
400 | target_architecture.c_str(), | |
401 | FNM_NOESCAPE); // still spooky | |
402 | ||
561079c8 FCE |
403 | bool result; |
404 | if (op->type == tok_operator && op->content == "==") | |
405 | result = !nomatch; | |
406 | else if (op->type == tok_operator && op->content == "!=") | |
407 | result = nomatch; | |
408 | else | |
409 | throw parse_error ("expected '==' or '!='", op); | |
410 | ||
411 | return result; | |
412 | } | |
60d98537 | 413 | else if (l->type == tok_identifier && startswith(l->content, "CONFIG_")) |
561079c8 | 414 | { |
717a457b MW |
415 | if (r->type == tok_string) |
416 | { | |
417 | string lhs = s.kernel_config[l->content]; // may be empty | |
418 | string rhs = r->content; | |
561079c8 | 419 | |
717a457b | 420 | int nomatch = fnmatch (rhs.c_str(), lhs.c_str(), FNM_NOESCAPE); // still spooky |
561079c8 | 421 | |
717a457b MW |
422 | bool result; |
423 | if (op->type == tok_operator && op->content == "==") | |
424 | result = !nomatch; | |
425 | else if (op->type == tok_operator && op->content == "!=") | |
426 | result = nomatch; | |
427 | else | |
428 | throw parse_error ("expected '==' or '!='", op); | |
dff50e09 | 429 | |
717a457b MW |
430 | return result; |
431 | } | |
432 | else if (r->type == tok_number) | |
433 | { | |
434 | const char* startp = s.kernel_config[l->content].c_str (); | |
435 | char* endp = (char*) startp; | |
436 | errno = 0; | |
437 | int64_t lhs = (int64_t) strtoll (startp, & endp, 0); | |
438 | if (errno == ERANGE || errno == EINVAL || *endp != '\0') | |
439 | throw parse_error ("Config option value not a number", l); | |
440 | ||
441 | int64_t rhs = lex_cast<int64_t>(r->content); | |
442 | return eval_comparison (lhs, op, rhs); | |
443 | } | |
4227f98d | 444 | else if (r->type == tok_identifier |
60d98537 | 445 | && startswith(r->content, "CONFIG_")) |
4227f98d MW |
446 | { |
447 | // First try to convert both to numbers, | |
448 | // otherwise threat both as strings. | |
449 | const char* startp = s.kernel_config[l->content].c_str (); | |
450 | char* endp = (char*) startp; | |
451 | errno = 0; | |
452 | int64_t val = (int64_t) strtoll (startp, & endp, 0); | |
453 | if (errno != ERANGE && errno != EINVAL && *endp == '\0') | |
454 | { | |
455 | int64_t lhs = val; | |
456 | startp = s.kernel_config[r->content].c_str (); | |
457 | endp = (char*) startp; | |
458 | errno = 0; | |
459 | int64_t rhs = (int64_t) strtoll (startp, & endp, 0); | |
460 | if (errno != ERANGE && errno != EINVAL && *endp == '\0') | |
461 | return eval_comparison (lhs, op, rhs); | |
462 | } | |
463 | ||
464 | string lhs = s.kernel_config[l->content]; | |
465 | string rhs = s.kernel_config[r->content]; | |
466 | return eval_comparison (lhs, op, rhs); | |
467 | } | |
717a457b | 468 | else |
4227f98d | 469 | throw parse_error ("expected string, number literal or other CONFIG_... as right value", r); |
dff50e09 | 470 | } |
c434ec7e | 471 | else if (l->type == tok_string && r->type == tok_string) |
5811366a | 472 | { |
c434ec7e FCE |
473 | string lhs = l->content; |
474 | string rhs = r->content; | |
475 | return eval_comparison (lhs, op, rhs); | |
476 | // NB: no wildcarding option here | |
477 | } | |
478 | else if (l->type == tok_number && r->type == tok_number) | |
479 | { | |
480 | int64_t lhs = lex_cast<int64_t>(l->content); | |
481 | int64_t rhs = lex_cast<int64_t>(r->content); | |
482 | return eval_comparison (lhs, op, rhs); | |
7a468d68 | 483 | // NB: no wildcarding option here |
5811366a FCE |
484 | } |
485 | else if (l->type == tok_string && r->type == tok_number | |
486 | && op->type == tok_operator) | |
487 | throw parse_error ("expected string literal as right value", r); | |
488 | else if (l->type == tok_number && r->type == tok_string | |
489 | && op->type == tok_operator) | |
490 | throw parse_error ("expected number literal as right value", r); | |
c434ec7e | 491 | |
177a8ead | 492 | else |
561079c8 | 493 | throw parse_error ("expected 'arch' or 'kernel_v' or 'kernel_vr' or 'CONFIG_...'\n" |
5811366a | 494 | " or comparison between strings or integers", l); |
177a8ead FCE |
495 | } |
496 | ||
497 | ||
5811366a | 498 | // Only tokens corresponding to the TRUE statement must be expanded |
177a8ead | 499 | const token* |
3f847830 | 500 | parser::scan_pp (bool wildcard) |
177a8ead FCE |
501 | { |
502 | while (true) | |
503 | { | |
e92f2566 JS |
504 | pp_state_t pp = PP_NONE; |
505 | if (!pp_state.empty()) | |
506 | pp = pp_state.back().second; | |
507 | ||
508 | const token* t = 0; | |
509 | if (pp == PP_SKIP_THEN || pp == PP_SKIP_ELSE) | |
510 | t = skip_pp (); | |
511 | else | |
512 | t = input.scan (wildcard); | |
513 | ||
514 | if (t == 0) // EOF | |
177a8ead | 515 | { |
e92f2566 JS |
516 | if (pp != PP_NONE) |
517 | { | |
518 | t = pp_state.back().first; | |
519 | pp_state.pop_back(); // so skip_some doesn't keep trying to close this | |
520 | throw parse_error ("incomplete conditional at end of file", t); | |
521 | } | |
177a8ead FCE |
522 | return t; |
523 | } | |
524 | ||
e92f2566 JS |
525 | // misplaced preprocessor "then" |
526 | if (t->type == tok_operator && t->content == "%?") | |
527 | throw parse_error ("incomplete conditional - missing '%('", t); | |
528 | ||
529 | // preprocessor "else" | |
530 | if (t->type == tok_operator && t->content == "%:") | |
531 | { | |
532 | if (pp == PP_NONE) | |
533 | throw parse_error ("incomplete conditional - missing '%('", t); | |
534 | if (pp == PP_KEEP_ELSE || pp == PP_SKIP_ELSE) | |
535 | throw parse_error ("invalid conditional - duplicate '%:'", t); | |
536 | ||
537 | pp_state.back().second = (pp == PP_KEEP_THEN) ? | |
538 | PP_SKIP_ELSE : PP_KEEP_ELSE; | |
539 | delete t; | |
540 | continue; | |
541 | } | |
542 | ||
543 | // preprocessor close | |
544 | if (t->type == tok_operator && t->content == "%)") | |
545 | { | |
546 | if (pp == PP_NONE) | |
547 | throw parse_error ("incomplete conditional - missing '%('", t); | |
548 | delete pp_state.back().first; | |
549 | delete t; | |
550 | pp_state.pop_back(); | |
551 | continue; | |
552 | } | |
dff50e09 | 553 | |
177a8ead FCE |
554 | if (! (t->type == tok_operator && t->content == "%(")) // ordinary token |
555 | return t; | |
556 | ||
557 | // We have a %( - it's time to throw a preprocessing party! | |
558 | ||
2d7881bf PP |
559 | bool result = false; |
560 | bool and_result = true; | |
561 | const token *n = NULL; | |
562 | do { | |
563 | const token *l, *op, *r; | |
e92f2566 | 564 | l = input.scan (false); |
2d7881bf PP |
565 | op = input.scan (false); |
566 | r = input.scan (false); | |
567 | if (l == 0 || op == 0 || r == 0) | |
568 | throw parse_error ("incomplete condition after '%('", t); | |
569 | // NB: consider generalizing to consume all tokens until %?, and | |
570 | // passing that as a vector to an evaluator. | |
571 | ||
572 | // Do not evaluate the condition if we haven't expanded everything. | |
573 | // This may occur when having several recursive conditionals. | |
574 | and_result &= eval_pp_conditional (session, l, op, r); | |
575 | delete l; | |
576 | delete op; | |
577 | delete r; | |
578 | delete n; | |
579 | ||
580 | n = input.scan (); | |
581 | if (n && n->type == tok_operator && n->content == "&&") | |
582 | continue; | |
583 | result |= and_result; | |
584 | and_result = true; | |
585 | if (! (n && n->type == tok_operator && n->content == "||")) | |
586 | break; | |
587 | } while (true); | |
3f847830 FCE |
588 | |
589 | /* | |
590 | clog << "PP eval (" << *t << ") == " << result << endl; | |
591 | */ | |
592 | ||
e92f2566 | 593 | const token *m = n; |
177a8ead FCE |
594 | if (! (m && m->type == tok_operator && m->content == "%?")) |
595 | throw parse_error ("expected '%?' marker for conditional", t); | |
70c743d8 | 596 | delete m; // "%?" |
177a8ead | 597 | |
e92f2566 JS |
598 | pp = result ? PP_KEEP_THEN : PP_SKIP_THEN; |
599 | pp_state.push_back (make_pair (t, pp)); | |
3f847830 | 600 | |
e92f2566 JS |
601 | // Now loop around to look for a real token. |
602 | } | |
603 | } | |
3f847830 | 604 | |
3f847830 | 605 | |
e92f2566 JS |
606 | // Skip over tokens and any errors, heeding |
607 | // only nested preprocessor starts and ends. | |
608 | const token* | |
609 | parser::skip_pp () | |
610 | { | |
611 | const token* t = 0; | |
612 | unsigned nesting = 0; | |
613 | do | |
614 | { | |
615 | try | |
616 | { | |
617 | t = input.scan (); | |
177a8ead | 618 | } |
e92f2566 | 619 | catch (const parse_error &e) |
70c743d8 | 620 | { |
e92f2566 | 621 | continue; |
70c743d8 | 622 | } |
e92f2566 JS |
623 | if (!t) |
624 | break; | |
625 | if (t->type == tok_operator && t->content == "%(") | |
626 | ++nesting; | |
627 | else if (nesting && t->type == tok_operator && t->content == "%)") | |
628 | --nesting; | |
629 | else if (!nesting && t->type == tok_operator && | |
630 | (t->content == "%:" || t->content == "%?" || t->content == "%)")) | |
631 | break; | |
632 | delete t; | |
177a8ead | 633 | } |
e92f2566 JS |
634 | while (true); |
635 | return t; | |
177a8ead FCE |
636 | } |
637 | ||
638 | ||
2f1a1aea | 639 | const token* |
0c218afb | 640 | parser::next (bool wildcard) |
2f1a1aea FCE |
641 | { |
642 | if (! next_t) | |
0c218afb | 643 | next_t = scan_pp (wildcard); |
2f1a1aea FCE |
644 | if (! next_t) |
645 | throw parse_error ("unexpected end-of-file"); | |
646 | ||
2f1a1aea FCE |
647 | last_t = next_t; |
648 | // advance by zeroing next_t | |
649 | next_t = 0; | |
650 | return last_t; | |
651 | } | |
652 | ||
653 | ||
654 | const token* | |
0c218afb | 655 | parser::peek (bool wildcard) |
2f1a1aea FCE |
656 | { |
657 | if (! next_t) | |
0c218afb | 658 | next_t = scan_pp (wildcard); |
2f1a1aea FCE |
659 | |
660 | // don't advance by zeroing next_t | |
661 | last_t = next_t; | |
662 | return next_t; | |
663 | } | |
664 | ||
665 | ||
d7f3e0c5 GH |
666 | static inline bool |
667 | tok_is(token const * t, token_type tt, string const & expected) | |
668 | { | |
669 | return t && t->type == tt && t->content == expected; | |
670 | } | |
671 | ||
672 | ||
dff50e09 | 673 | const token* |
d7f3e0c5 GH |
674 | parser::expect_known (token_type tt, string const & expected) |
675 | { | |
676 | const token *t = next(); | |
57b73400 | 677 | if (! (t && t->type == tt && t->content == expected)) |
d7f3e0c5 GH |
678 | throw parse_error ("expected '" + expected + "'"); |
679 | return t; | |
680 | } | |
681 | ||
682 | ||
dff50e09 | 683 | const token* |
d7f3e0c5 GH |
684 | parser::expect_unknown (token_type tt, string & target) |
685 | { | |
686 | const token *t = next(); | |
687 | if (!(t && t->type == tt)) | |
688 | throw parse_error ("expected " + tt2str(tt)); | |
689 | target = t->content; | |
690 | return t; | |
691 | } | |
692 | ||
693 | ||
dff50e09 | 694 | const token* |
493ee224 DS |
695 | parser::expect_unknown2 (token_type tt1, token_type tt2, string & target) |
696 | { | |
697 | const token *t = next(); | |
698 | if (!(t && (t->type == tt1 || t->type == tt2))) | |
699 | throw parse_error ("expected " + tt2str(tt1) + " or " + tt2str(tt2)); | |
700 | target = t->content; | |
701 | return t; | |
702 | } | |
703 | ||
704 | ||
dff50e09 | 705 | const token* |
d7f3e0c5 GH |
706 | parser::expect_op (std::string const & expected) |
707 | { | |
708 | return expect_known (tok_operator, expected); | |
709 | } | |
710 | ||
711 | ||
dff50e09 | 712 | const token* |
d7f3e0c5 GH |
713 | parser::expect_kw (std::string const & expected) |
714 | { | |
f4fe2e93 | 715 | return expect_known (tok_keyword, expected); |
d7f3e0c5 GH |
716 | } |
717 | ||
dff50e09 | 718 | const token* |
e38723d2 | 719 | parser::expect_number (int64_t & value) |
57b73400 | 720 | { |
e38723d2 MH |
721 | bool neg = false; |
722 | const token *t = next(); | |
723 | if (t->type == tok_operator && t->content == "-") | |
724 | { | |
725 | neg = true; | |
726 | t = next (); | |
727 | } | |
728 | if (!(t && t->type == tok_number)) | |
729 | throw parse_error ("expected number"); | |
730 | ||
731 | const char* startp = t->content.c_str (); | |
732 | char* endp = (char*) startp; | |
733 | ||
734 | // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX | |
735 | // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX, | |
736 | // since the lexer only gives us positive digit strings, but we'll | |
737 | // limit it to LLONG_MIN when a '-' operator is fed into the literal. | |
738 | errno = 0; | |
739 | value = (int64_t) strtoull (startp, & endp, 0); | |
740 | if (errno == ERANGE || errno == EINVAL || *endp != '\0' | |
741 | || (neg && (unsigned long long) value > 9223372036854775808ULL) | |
742 | || (unsigned long long) value > 18446744073709551615ULL | |
743 | || value < -9223372036854775807LL-1) | |
dff50e09 FCE |
744 | throw parse_error ("number invalid or out of range"); |
745 | ||
e38723d2 MH |
746 | if (neg) |
747 | value = -value; | |
748 | ||
749 | return t; | |
57b73400 GH |
750 | } |
751 | ||
d7f3e0c5 | 752 | |
dff50e09 | 753 | const token* |
d7f3e0c5 GH |
754 | parser::expect_ident (std::string & target) |
755 | { | |
756 | return expect_unknown (tok_identifier, target); | |
757 | } | |
758 | ||
759 | ||
dff50e09 | 760 | const token* |
493ee224 DS |
761 | parser::expect_ident_or_keyword (std::string & target) |
762 | { | |
763 | return expect_unknown2 (tok_identifier, tok_keyword, target); | |
764 | } | |
765 | ||
766 | ||
dff50e09 | 767 | bool |
d7f3e0c5 GH |
768 | parser::peek_op (std::string const & op) |
769 | { | |
770 | return tok_is (peek(), tok_operator, op); | |
771 | } | |
772 | ||
773 | ||
dff50e09 | 774 | bool |
d7f3e0c5 GH |
775 | parser::peek_kw (std::string const & kw) |
776 | { | |
777 | return tok_is (peek(), tok_identifier, kw); | |
778 | } | |
779 | ||
780 | ||
781 | ||
66c7d4c1 | 782 | lexer::lexer (istream& input, const string& in, systemtap_session& s): |
c5be7511 | 783 | ate_comment(false), input_name (in), input_pointer (0), input_end (0), |
9300f661 JS |
784 | cursor_suspend_count(0), cursor_suspend_line (1), cursor_suspend_column (1), |
785 | cursor_line (1), cursor_column (1), | |
66c7d4c1 | 786 | session(s), current_file (0) |
eacb10ce | 787 | { |
66c7d4c1 | 788 | getline(input, input_contents, '\0'); |
2203b032 | 789 | |
66c7d4c1 JS |
790 | input_pointer = input_contents.data(); |
791 | input_end = input_contents.data() + input_contents.size(); | |
792 | ||
793 | if (keywords.empty()) | |
794 | { | |
795 | keywords.insert("probe"); | |
796 | keywords.insert("global"); | |
797 | keywords.insert("function"); | |
798 | keywords.insert("if"); | |
799 | keywords.insert("else"); | |
800 | keywords.insert("for"); | |
801 | keywords.insert("foreach"); | |
802 | keywords.insert("in"); | |
803 | keywords.insert("limit"); | |
804 | keywords.insert("return"); | |
805 | keywords.insert("delete"); | |
806 | keywords.insert("while"); | |
807 | keywords.insert("break"); | |
808 | keywords.insert("continue"); | |
809 | keywords.insert("next"); | |
810 | keywords.insert("string"); | |
811 | keywords.insert("long"); | |
f4fe2e93 FCE |
812 | keywords.insert("try"); |
813 | keywords.insert("catch"); | |
66c7d4c1 | 814 | } |
eacb10ce | 815 | } |
2f1a1aea | 816 | |
66c7d4c1 JS |
817 | set<string> lexer::keywords; |
818 | ||
1b1b4ceb RA |
819 | void |
820 | lexer::set_current_file (stapfile* f) | |
821 | { | |
822 | current_file = f; | |
2203b032 JS |
823 | if (f) |
824 | { | |
825 | f->file_contents = input_contents; | |
826 | f->name = input_name; | |
827 | } | |
1b1b4ceb | 828 | } |
bb2e3076 FCE |
829 | |
830 | int | |
831 | lexer::input_peek (unsigned n) | |
832 | { | |
66c7d4c1 JS |
833 | if (input_pointer + n >= input_end) |
834 | return -1; // EOF | |
835 | return (unsigned char)*(input_pointer + n); | |
bb2e3076 FCE |
836 | } |
837 | ||
838 | ||
dff50e09 | 839 | int |
2f1a1aea FCE |
840 | lexer::input_get () |
841 | { | |
66c7d4c1 | 842 | int c = input_peek(); |
bb2e3076 FCE |
843 | if (c < 0) return c; // EOF |
844 | ||
66c7d4c1 JS |
845 | ++input_pointer; |
846 | ||
3f99432c | 847 | if (cursor_suspend_count) |
9300f661 JS |
848 | { |
849 | // Track effect of input_put: preserve previous cursor/line_column | |
850 | // until all of its characters are consumed. | |
851 | if (--cursor_suspend_count == 0) | |
852 | { | |
853 | cursor_line = cursor_suspend_line; | |
854 | cursor_column = cursor_suspend_column; | |
855 | } | |
856 | } | |
3f99432c | 857 | else |
2f1a1aea | 858 | { |
3f99432c FCE |
859 | // update source cursor |
860 | if (c == '\n') | |
861 | { | |
862 | cursor_line ++; | |
863 | cursor_column = 1; | |
864 | } | |
865 | else | |
866 | cursor_column ++; | |
2f1a1aea | 867 | } |
2f1a1aea | 868 | |
eacb10ce | 869 | // clog << "[" << (char)c << "]"; |
2f1a1aea FCE |
870 | return c; |
871 | } | |
872 | ||
873 | ||
3f99432c | 874 | void |
9300f661 | 875 | lexer::input_put (const string& chars, const token* t) |
3f99432c | 876 | { |
66c7d4c1 JS |
877 | size_t pos = input_pointer - input_contents.data(); |
878 | // clog << "[put:" << chars << " @" << pos << "]"; | |
879 | input_contents.insert (pos, chars); | |
eacb10ce | 880 | cursor_suspend_count += chars.size(); |
9300f661 JS |
881 | cursor_suspend_line = cursor_line; |
882 | cursor_suspend_column = cursor_column; | |
883 | cursor_line = t->location.line; | |
884 | cursor_column = t->location.column; | |
66c7d4c1 JS |
885 | input_pointer = input_contents.data() + pos; |
886 | input_end = input_contents.data() + input_contents.size(); | |
3f99432c FCE |
887 | } |
888 | ||
889 | ||
2f1a1aea | 890 | token* |
3f847830 | 891 | lexer::scan (bool wildcard) |
2f1a1aea | 892 | { |
c5be7511 | 893 | ate_comment = false; // reset for each new token |
2f1a1aea | 894 | token* n = new token; |
2203b032 | 895 | n->location.file = current_file; |
2f1a1aea | 896 | |
9300f661 JS |
897 | skip: |
898 | bool suspended = (cursor_suspend_count > 0); | |
2f1a1aea FCE |
899 | n->location.line = cursor_line; |
900 | n->location.column = cursor_column; | |
901 | ||
902 | int c = input_get(); | |
3f99432c | 903 | // clog << "{" << (char)c << (char)c2 << "}"; |
2f1a1aea FCE |
904 | if (c < 0) |
905 | { | |
906 | delete n; | |
907 | return 0; | |
908 | } | |
909 | ||
910 | if (isspace (c)) | |
911 | goto skip; | |
912 | ||
66c7d4c1 JS |
913 | int c2 = input_peek (); |
914 | ||
3f99432c FCE |
915 | // Paste command line arguments as character streams into |
916 | // the beginning of a token. $1..$999 go through as raw | |
917 | // characters; @1..@999 are quoted/escaped as strings. | |
918 | // $# and @# expand to the number of arguments, similarly | |
919 | // raw or quoted. | |
9300f661 | 920 | if ((c == '$' || c == '@') && (c2 == '#')) |
3f99432c | 921 | { |
9300f661 JS |
922 | n->content.push_back (c); |
923 | n->content.push_back (c2); | |
3f99432c | 924 | input_get(); // swallow '#' |
9300f661 JS |
925 | if (suspended) |
926 | throw parse_error ("invalid nested substitution of command line arguments", n); | |
927 | size_t num_args = session.args.size (); | |
928 | input_put ((c == '$') ? lex_cast (num_args) : lex_cast_qstring (num_args), n); | |
929 | n->content.clear(); | |
930 | goto skip; | |
3f99432c | 931 | } |
9300f661 | 932 | else if ((c == '$' || c == '@') && (isdigit (c2))) |
3f99432c | 933 | { |
9300f661 | 934 | n->content.push_back (c); |
3f99432c FCE |
935 | unsigned idx = 0; |
936 | do | |
937 | { | |
938 | input_get (); | |
939 | idx = (idx * 10) + (c2 - '0'); | |
9300f661 | 940 | n->content.push_back (c2); |
3f99432c FCE |
941 | c2 = input_peek (); |
942 | } while (c2 > 0 && | |
dff50e09 | 943 | isdigit (c2) && |
3f99432c | 944 | idx <= session.args.size()); // prevent overflow |
9300f661 JS |
945 | if (suspended) |
946 | throw parse_error ("invalid nested substitution of command line arguments", n); | |
3f99432c FCE |
947 | if (idx == 0 || |
948 | idx-1 >= session.args.size()) | |
aca66a36 JS |
949 | throw parse_error ("command line argument index " + lex_cast(idx) |
950 | + " out of range [1-" + lex_cast(session.args.size()) + "]", n); | |
9300f661 JS |
951 | const string& arg = session.args[idx-1]; |
952 | input_put ((c == '$') ? arg : lex_cast_qstring (arg), n); | |
953 | n->content.clear(); | |
954 | goto skip; | |
3f99432c FCE |
955 | } |
956 | ||
0c218afb MH |
957 | else if (isalpha (c) || c == '$' || c == '@' || c == '_' || |
958 | (wildcard && c == '*')) | |
2f1a1aea FCE |
959 | { |
960 | n->type = tok_identifier; | |
961 | n->content = (char) c; | |
0c218afb MH |
962 | while (isalnum (c2) || c2 == '_' || c2 == '$' || |
963 | (wildcard && c2 == '*')) | |
2f1a1aea | 964 | { |
3f99432c FCE |
965 | input_get (); |
966 | n->content.push_back (c2); | |
967 | c2 = input_peek (); | |
6e213f58 | 968 | } |
213bee8f | 969 | |
66c7d4c1 | 970 | if (keywords.count(n->content)) |
3f99432c | 971 | n->type = tok_keyword; |
dff50e09 | 972 | |
2f1a1aea FCE |
973 | return n; |
974 | } | |
975 | ||
3a20432b | 976 | else if (isdigit (c)) // positive literal |
2f1a1aea | 977 | { |
2f1a1aea | 978 | n->type = tok_number; |
9c0c0e46 FCE |
979 | n->content = (char) c; |
980 | ||
66c7d4c1 | 981 | while (isalnum (c2)) |
2f1a1aea | 982 | { |
9c0c0e46 FCE |
983 | // NB: isalnum is very permissive. We rely on strtol, called in |
984 | // parser::parse_literal below, to confirm that the number string | |
985 | // is correctly formatted and in range. | |
986 | ||
66c7d4c1 JS |
987 | input_get (); |
988 | n->content.push_back (c2); | |
989 | c2 = input_peek (); | |
2f1a1aea FCE |
990 | } |
991 | return n; | |
992 | } | |
993 | ||
994 | else if (c == '\"') | |
995 | { | |
996 | n->type = tok_string; | |
997 | while (1) | |
998 | { | |
999 | c = input_get (); | |
1000 | ||
3f99432c | 1001 | if (c < 0 || c == '\n') |
2f1a1aea | 1002 | { |
72cdb9cd | 1003 | throw parse_error("Could not find matching closing quote", n); |
2f1a1aea FCE |
1004 | } |
1005 | if (c == '\"') // closing double-quotes | |
1006 | break; | |
3f99432c | 1007 | else if (c == '\\') // see also input_put |
dff50e09 | 1008 | { |
7d46afb8 GH |
1009 | c = input_get (); |
1010 | switch (c) | |
1011 | { | |
1012 | case 'a': | |
1013 | case 'b': | |
1014 | case 't': | |
1015 | case 'n': | |
1016 | case 'v': | |
1017 | case 'f': | |
1018 | case 'r': | |
f03954fd | 1019 | case '0' ... '7': // NB: need only match the first digit |
7d46afb8 | 1020 | case '\\': |
7d46afb8 | 1021 | // Pass these escapes through to the string value |
dff50e09 | 1022 | // being parsed; it will be emitted into a C literal. |
7d46afb8 GH |
1023 | |
1024 | n->content.push_back('\\'); | |
1025 | ||
3f99432c | 1026 | // fall through |
7d46afb8 | 1027 | default: |
7d46afb8 GH |
1028 | n->content.push_back(c); |
1029 | break; | |
1030 | } | |
2f1a1aea FCE |
1031 | } |
1032 | else | |
1033 | n->content.push_back(c); | |
1034 | } | |
1035 | return n; | |
1036 | } | |
1037 | ||
1038 | else if (ispunct (c)) | |
1039 | { | |
bb2e3076 | 1040 | int c3 = input_peek (1); |
2f1a1aea | 1041 | |
3a20432b FCE |
1042 | // NB: if we were to recognize negative numeric literals here, |
1043 | // we'd introduce another grammar ambiguity: | |
1044 | // 1-1 would be parsed as tok_number(1) and tok_number(-1) | |
1045 | // instead of tok_number(1) tok_operator('-') tok_number(1) | |
1046 | ||
66c7d4c1 | 1047 | if (c == '#') // shell comment |
2f1a1aea FCE |
1048 | { |
1049 | unsigned this_line = cursor_line; | |
bb2e3076 FCE |
1050 | do { c = input_get (); } |
1051 | while (c >= 0 && cursor_line == this_line); | |
c5be7511 | 1052 | ate_comment = true; |
2f1a1aea FCE |
1053 | goto skip; |
1054 | } | |
66c7d4c1 | 1055 | else if ((c == '/' && c2 == '/')) // C++ comment |
63a7c90e FCE |
1056 | { |
1057 | unsigned this_line = cursor_line; | |
bb2e3076 FCE |
1058 | do { c = input_get (); } |
1059 | while (c >= 0 && cursor_line == this_line); | |
c5be7511 | 1060 | ate_comment = true; |
63a7c90e FCE |
1061 | goto skip; |
1062 | } | |
1063 | else if (c == '/' && c2 == '*') // C comment | |
1064 | { | |
66c7d4c1 JS |
1065 | (void) input_get (); // swallow '*' already in c2 |
1066 | c = input_get (); | |
63a7c90e | 1067 | c2 = input_get (); |
bb2e3076 | 1068 | while (c2 >= 0) |
63a7c90e | 1069 | { |
66c7d4c1 JS |
1070 | if (c == '*' && c2 == '/') |
1071 | break; | |
63a7c90e FCE |
1072 | c = c2; |
1073 | c2 = input_get (); | |
63a7c90e | 1074 | } |
c5be7511 | 1075 | ate_comment = true; |
bb2e3076 | 1076 | goto skip; |
63a7c90e | 1077 | } |
54dfabe9 FCE |
1078 | else if (c == '%' && c2 == '{') // embedded code |
1079 | { | |
1080 | n->type = tok_embedded; | |
1081 | (void) input_get (); // swallow '{' already in c2 | |
66c7d4c1 JS |
1082 | c = input_get (); |
1083 | c2 = input_get (); | |
1084 | while (c2 >= 0) | |
54dfabe9 | 1085 | { |
66c7d4c1 JS |
1086 | if (c == '%' && c2 == '}') |
1087 | return n; | |
54dfabe9 | 1088 | n->content += c; |
66c7d4c1 JS |
1089 | c = c2; |
1090 | c2 = input_get (); | |
54dfabe9 | 1091 | } |
72cdb9cd CW |
1092 | |
1093 | throw parse_error ("Could not find matching '%}' to close embedded function block", n); | |
54dfabe9 | 1094 | } |
2f1a1aea | 1095 | |
bb2e3076 FCE |
1096 | // We're committed to recognizing at least the first character |
1097 | // as an operator. | |
2f1a1aea | 1098 | n->type = tok_operator; |
66c7d4c1 | 1099 | n->content = c; |
2f1a1aea | 1100 | |
bb2e3076 | 1101 | // match all valid operators, in decreasing size order |
66c7d4c1 JS |
1102 | if ((c == '<' && c2 == '<' && c3 == '<') || |
1103 | (c == '<' && c2 == '<' && c3 == '=') || | |
1104 | (c == '>' && c2 == '>' && c3 == '=')) | |
82919855 | 1105 | { |
66c7d4c1 JS |
1106 | n->content += c2; |
1107 | n->content += c3; | |
bb2e3076 FCE |
1108 | input_get (); input_get (); // swallow other two characters |
1109 | } | |
66c7d4c1 JS |
1110 | else if ((c == '=' && c2 == '=') || |
1111 | (c == '!' && c2 == '=') || | |
1112 | (c == '<' && c2 == '=') || | |
1113 | (c == '>' && c2 == '=') || | |
1114 | (c == '+' && c2 == '=') || | |
1115 | (c == '-' && c2 == '=') || | |
1116 | (c == '*' && c2 == '=') || | |
1117 | (c == '/' && c2 == '=') || | |
1118 | (c == '%' && c2 == '=') || | |
1119 | (c == '&' && c2 == '=') || | |
1120 | (c == '^' && c2 == '=') || | |
1121 | (c == '|' && c2 == '=') || | |
1122 | (c == '.' && c2 == '=') || | |
1123 | (c == '&' && c2 == '&') || | |
1124 | (c == '|' && c2 == '|') || | |
1125 | (c == '+' && c2 == '+') || | |
1126 | (c == '-' && c2 == '-') || | |
1127 | (c == '-' && c2 == '>') || | |
1128 | (c == '<' && c2 == '<') || | |
1129 | (c == '>' && c2 == '>') || | |
177a8ead | 1130 | // preprocessor tokens |
66c7d4c1 JS |
1131 | (c == '%' && c2 == '(') || |
1132 | (c == '%' && c2 == '?') || | |
1133 | (c == '%' && c2 == ':') || | |
1134 | (c == '%' && c2 == ')')) | |
bb2e3076 | 1135 | { |
66c7d4c1 | 1136 | n->content += c2; |
bb2e3076 | 1137 | input_get (); // swallow other character |
dff50e09 | 1138 | } |
2f1a1aea FCE |
1139 | |
1140 | return n; | |
1141 | } | |
1142 | ||
1143 | else | |
1144 | { | |
1145 | n->type = tok_junk; | |
1146 | n->content = (char) c; | |
1147 | return n; | |
1148 | } | |
1149 | } | |
1150 | ||
1151 | ||
1152 | // ------------------------------------------------------------------------ | |
1153 | ||
1154 | stapfile* | |
1155 | parser::parse () | |
1156 | { | |
1157 | stapfile* f = new stapfile; | |
1b1b4ceb | 1158 | input.set_current_file (f); |
56099f08 FCE |
1159 | |
1160 | bool empty = true; | |
1161 | ||
2f1a1aea FCE |
1162 | while (1) |
1163 | { | |
1164 | try | |
1165 | { | |
1166 | const token* t = peek (); | |
56099f08 | 1167 | if (! t) // nice clean EOF |
2f1a1aea FCE |
1168 | break; |
1169 | ||
56099f08 | 1170 | empty = false; |
6e213f58 DS |
1171 | if (t->type == tok_keyword && t->content == "probe") |
1172 | { | |
1173 | context = con_probe; | |
1174 | parse_probe (f->probes, f->aliases); | |
1175 | } | |
1176 | else if (t->type == tok_keyword && t->content == "global") | |
1177 | { | |
1178 | context = con_global; | |
4b5f3e45 | 1179 | parse_global (f->globals, f->probes); |
6e213f58 DS |
1180 | } |
1181 | else if (t->type == tok_keyword && t->content == "function") | |
1182 | { | |
1183 | context = con_function; | |
1184 | parse_functiondecl (f->functions); | |
1185 | } | |
54dfabe9 | 1186 | else if (t->type == tok_embedded) |
6e213f58 DS |
1187 | { |
1188 | context = con_embedded; | |
1189 | f->embeds.push_back (parse_embeddedcode ()); | |
1190 | } | |
2f1a1aea | 1191 | else |
6e213f58 DS |
1192 | { |
1193 | context = con_unknown; | |
1194 | throw parse_error ("expected 'probe', 'global', 'function', or '%{'"); | |
1195 | } | |
2f1a1aea FCE |
1196 | } |
1197 | catch (parse_error& pe) | |
1198 | { | |
1199 | print_error (pe); | |
cd7116b8 | 1200 | if (pe.skip_some) // for recovery |
dff50e09 | 1201 | try |
cd7116b8 FCE |
1202 | { |
1203 | // Quietly swallow all tokens until the next '}'. | |
1204 | while (1) | |
1205 | { | |
1206 | const token* t = peek (); | |
1207 | if (! t) | |
1208 | break; | |
1209 | next (); | |
1210 | if (t->type == tok_operator && t->content == "}") | |
1211 | break; | |
1212 | } | |
1213 | } | |
1214 | catch (parse_error& pe2) | |
1215 | { | |
1216 | // parse error during recovery ... ugh | |
1217 | print_error (pe2); | |
1218 | } | |
177a8ead | 1219 | } |
2f1a1aea FCE |
1220 | } |
1221 | ||
56099f08 FCE |
1222 | if (empty) |
1223 | { | |
1224 | cerr << "Input file '" << input_name << "' is empty or missing." << endl; | |
1225 | delete f; | |
2203b032 | 1226 | f = 0; |
56099f08 FCE |
1227 | } |
1228 | else if (num_errors > 0) | |
2f1a1aea FCE |
1229 | { |
1230 | cerr << num_errors << " parse error(s)." << endl; | |
1231 | delete f; | |
2203b032 | 1232 | f = 0; |
2f1a1aea | 1233 | } |
dff50e09 | 1234 | |
2203b032 | 1235 | input.set_current_file(0); |
2f1a1aea FCE |
1236 | return f; |
1237 | } | |
1238 | ||
1239 | ||
20c6c071 | 1240 | void |
54dfabe9 FCE |
1241 | parser::parse_probe (std::vector<probe *> & probe_ret, |
1242 | std::vector<probe_alias *> & alias_ret) | |
2f1a1aea | 1243 | { |
82919855 | 1244 | const token* t0 = next (); |
6e213f58 | 1245 | if (! (t0->type == tok_keyword && t0->content == "probe")) |
82919855 FCE |
1246 | throw parse_error ("expected 'probe'"); |
1247 | ||
20c6c071 GH |
1248 | vector<probe_point *> aliases; |
1249 | vector<probe_point *> locations; | |
1250 | ||
1251 | bool equals_ok = true; | |
82919855 | 1252 | |
97266278 LG |
1253 | int epilogue_alias = 0; |
1254 | ||
2f1a1aea FCE |
1255 | while (1) |
1256 | { | |
b4ceace2 | 1257 | probe_point * pp = parse_probe_point (); |
dff50e09 | 1258 | |
b4ceace2 | 1259 | const token* t = peek (); |
dff50e09 | 1260 | if (equals_ok && t |
b4ceace2 FCE |
1261 | && t->type == tok_operator && t->content == "=") |
1262 | { | |
1ad820e3 | 1263 | if (pp->optional || pp->sufficient) |
f1a0157a | 1264 | throw parse_error ("probe point alias name cannot be optional nor sufficient", pp->components.front()->tok); |
b4ceace2 FCE |
1265 | aliases.push_back(pp); |
1266 | next (); | |
1267 | continue; | |
1268 | } | |
dff50e09 | 1269 | else if (equals_ok && t |
97266278 LG |
1270 | && t->type == tok_operator && t->content == "+=") |
1271 | { | |
1ad820e3 | 1272 | if (pp->optional || pp->sufficient) |
f1a0157a | 1273 | throw parse_error ("probe point alias name cannot be optional nor sufficient", pp->components.front()->tok); |
97266278 LG |
1274 | aliases.push_back(pp); |
1275 | epilogue_alias = 1; | |
1276 | next (); | |
1277 | continue; | |
1278 | } | |
b4ceace2 FCE |
1279 | else if (t && t->type == tok_operator && t->content == ",") |
1280 | { | |
1281 | locations.push_back(pp); | |
1282 | equals_ok = false; | |
1283 | next (); | |
1284 | continue; | |
1285 | } | |
1286 | else if (t && t->type == tok_operator && t->content == "{") | |
1287 | { | |
1288 | locations.push_back(pp); | |
1289 | break; | |
1290 | } | |
2f1a1aea | 1291 | else |
9c0c0e46 | 1292 | throw parse_error ("expected probe point specifier"); |
2f1a1aea | 1293 | } |
20c6c071 | 1294 | |
20c6c071 GH |
1295 | if (aliases.empty()) |
1296 | { | |
54dfabe9 FCE |
1297 | probe* p = new probe; |
1298 | p->tok = t0; | |
1299 | p->locations = locations; | |
1300 | p->body = parse_stmt_block (); | |
37ebca01 | 1301 | p->privileged = privileged; |
54dfabe9 | 1302 | probe_ret.push_back (p); |
20c6c071 GH |
1303 | } |
1304 | else | |
1305 | { | |
54dfabe9 | 1306 | probe_alias* p = new probe_alias (aliases); |
97266278 LG |
1307 | if(epilogue_alias) |
1308 | p->epilogue_style = true; | |
1309 | else | |
1310 | p->epilogue_style = false; | |
54dfabe9 FCE |
1311 | p->tok = t0; |
1312 | p->locations = locations; | |
1313 | p->body = parse_stmt_block (); | |
37ebca01 | 1314 | p->privileged = privileged; |
54dfabe9 | 1315 | alias_ret.push_back (p); |
20c6c071 | 1316 | } |
54dfabe9 | 1317 | } |
20c6c071 | 1318 | |
54dfabe9 FCE |
1319 | |
1320 | embeddedcode* | |
1321 | parser::parse_embeddedcode () | |
1322 | { | |
1323 | embeddedcode* e = new embeddedcode; | |
1324 | const token* t = next (); | |
1325 | if (t->type != tok_embedded) | |
24cb178f FCE |
1326 | throw parse_error ("expected '%{'"); |
1327 | ||
1328 | if (! privileged) | |
cd7116b8 FCE |
1329 | throw parse_error ("embedded code in unprivileged script", |
1330 | false /* don't skip tokens for parse resumption */); | |
54dfabe9 FCE |
1331 | |
1332 | e->tok = t; | |
1333 | e->code = t->content; | |
1334 | return e; | |
2f1a1aea FCE |
1335 | } |
1336 | ||
1337 | ||
1338 | block* | |
56099f08 | 1339 | parser::parse_stmt_block () |
2f1a1aea FCE |
1340 | { |
1341 | block* pb = new block; | |
1342 | ||
56099f08 FCE |
1343 | const token* t = next (); |
1344 | if (! (t->type == tok_operator && t->content == "{")) | |
1345 | throw parse_error ("expected '{'"); | |
1346 | ||
1347 | pb->tok = t; | |
2b066ec1 | 1348 | |
2f1a1aea FCE |
1349 | while (1) |
1350 | { | |
1351 | try | |
1352 | { | |
2b066ec1 FCE |
1353 | t = peek (); |
1354 | if (t && t->type == tok_operator && t->content == "}") | |
1355 | { | |
1356 | next (); | |
1357 | break; | |
1358 | } | |
1359 | ||
2f1a1aea | 1360 | pb->statements.push_back (parse_statement ()); |
2f1a1aea FCE |
1361 | } |
1362 | catch (parse_error& pe) | |
1363 | { | |
1364 | print_error (pe); | |
54dfabe9 | 1365 | |
2f1a1aea FCE |
1366 | // Quietly swallow all tokens until the next ';' or '}'. |
1367 | while (1) | |
1368 | { | |
1369 | const token* t = peek (); | |
54dfabe9 | 1370 | if (! t) return 0; |
2f1a1aea | 1371 | next (); |
54dfabe9 FCE |
1372 | if (t->type == tok_operator |
1373 | && (t->content == "}" || t->content == ";")) | |
2f1a1aea FCE |
1374 | break; |
1375 | } | |
1376 | } | |
1377 | } | |
1378 | ||
1379 | return pb; | |
1380 | } | |
1381 | ||
1382 | ||
f4fe2e93 FCE |
1383 | try_block* |
1384 | parser::parse_try_block () | |
1385 | { | |
1386 | try_block* pb = new try_block; | |
1387 | ||
1388 | pb->tok = expect_kw ("try"); | |
1389 | pb->try_block = parse_stmt_block(); | |
1390 | expect_kw ("catch"); | |
1391 | ||
1392 | const token* t = peek (); | |
1393 | if (t->type == tok_operator && t->content == "(") | |
1394 | { | |
1395 | next (); // swallow the '(' | |
1396 | ||
1397 | t = next(); | |
1398 | if (! (t->type == tok_identifier)) | |
1399 | throw parse_error ("expected identifier"); | |
1400 | symbol* sym = new symbol; | |
1401 | sym->tok = t; | |
1402 | sym->name = t->content; | |
1403 | pb->catch_error_var = sym; | |
1404 | ||
1405 | expect_op (")"); | |
1406 | } | |
1407 | else | |
1408 | pb->catch_error_var = 0; | |
1409 | ||
1410 | pb->catch_block = parse_stmt_block(); | |
1411 | ||
1412 | return pb; | |
1413 | } | |
1414 | ||
1415 | ||
1416 | ||
2f1a1aea FCE |
1417 | statement* |
1418 | parser::parse_statement () | |
1419 | { | |
40b71c47 | 1420 | statement *ret; |
2f1a1aea FCE |
1421 | const token* t = peek (); |
1422 | if (t && t->type == tok_operator && t->content == ";") | |
f946b10f | 1423 | return new null_statement (next ()); |
dff50e09 | 1424 | else if (t && t->type == tok_operator && t->content == "{") |
40b71c47 | 1425 | return parse_stmt_block (); // Don't squash semicolons. |
f4fe2e93 FCE |
1426 | else if (t && t->type == tok_keyword && t->content == "try") |
1427 | return parse_try_block (); // Don't squash semicolons. | |
6e213f58 | 1428 | else if (t && t->type == tok_keyword && t->content == "if") |
40b71c47 | 1429 | return parse_if_statement (); // Don't squash semicolons. |
6e213f58 | 1430 | else if (t && t->type == tok_keyword && t->content == "for") |
40b71c47 | 1431 | return parse_for_loop (); // Don't squash semicolons. |
6e213f58 | 1432 | else if (t && t->type == tok_keyword && t->content == "foreach") |
40b71c47 MW |
1433 | return parse_foreach_loop (); // Don't squash semicolons. |
1434 | else if (t && t->type == tok_keyword && t->content == "while") | |
1435 | return parse_while_loop (); // Don't squash semicolons. | |
6e213f58 | 1436 | else if (t && t->type == tok_keyword && t->content == "return") |
40b71c47 | 1437 | ret = parse_return_statement (); |
6e213f58 | 1438 | else if (t && t->type == tok_keyword && t->content == "delete") |
40b71c47 | 1439 | ret = parse_delete_statement (); |
6e213f58 | 1440 | else if (t && t->type == tok_keyword && t->content == "break") |
40b71c47 | 1441 | ret = parse_break_statement (); |
6e213f58 | 1442 | else if (t && t->type == tok_keyword && t->content == "continue") |
40b71c47 | 1443 | ret = parse_continue_statement (); |
6e213f58 | 1444 | else if (t && t->type == tok_keyword && t->content == "next") |
40b71c47 | 1445 | ret = parse_next_statement (); |
2f1a1aea FCE |
1446 | else if (t && (t->type == tok_operator || // expressions are flexible |
1447 | t->type == tok_identifier || | |
1448 | t->type == tok_number || | |
7d902887 FCE |
1449 | t->type == tok_string || |
1450 | t->type == tok_embedded )) | |
40b71c47 | 1451 | ret = parse_expr_statement (); |
54dfabe9 | 1452 | // XXX: consider generally accepting tok_embedded here too |
2f1a1aea FCE |
1453 | else |
1454 | throw parse_error ("expected statement"); | |
40b71c47 MW |
1455 | |
1456 | // Squash "empty" trailing colons after any "non-block-like" statement. | |
1457 | t = peek (); | |
1458 | if (t && t->type == tok_operator && t->content == ";") | |
1459 | { | |
1460 | next (); // Silently eat trailing ; after statement | |
1461 | } | |
1462 | ||
1463 | return ret; | |
2f1a1aea FCE |
1464 | } |
1465 | ||
1466 | ||
56099f08 | 1467 | void |
78f6bba6 | 1468 | parser::parse_global (vector <vardecl*>& globals, vector<probe*>&) |
2f1a1aea | 1469 | { |
82919855 | 1470 | const token* t0 = next (); |
6e213f58 | 1471 | if (! (t0->type == tok_keyword && t0->content == "global")) |
82919855 FCE |
1472 | throw parse_error ("expected 'global'"); |
1473 | ||
56099f08 FCE |
1474 | while (1) |
1475 | { | |
1476 | const token* t = next (); | |
1477 | if (! (t->type == tok_identifier)) | |
1478 | throw parse_error ("expected identifier"); | |
1479 | ||
2b066ec1 FCE |
1480 | for (unsigned i=0; i<globals.size(); i++) |
1481 | if (globals[i]->name == t->content) | |
57b73400 | 1482 | throw parse_error ("duplicate global name"); |
dff50e09 | 1483 | |
24cb178f FCE |
1484 | vardecl* d = new vardecl; |
1485 | d->name = t->content; | |
1486 | d->tok = t; | |
1487 | globals.push_back (d); | |
56099f08 | 1488 | |
82919855 | 1489 | t = peek (); |
ef474d24 JS |
1490 | |
1491 | if (t && t->type == tok_operator && t->content == "[") // array size | |
1492 | { | |
1493 | int64_t size; | |
1494 | next (); | |
1495 | expect_number(size); | |
1496 | if (size <= 0 || size > 1000000) // arbitrary max | |
1497 | throw parse_error("array size out of range"); | |
1498 | d->maxsize = (int)size; | |
1499 | expect_known(tok_operator, "]"); | |
1500 | t = peek (); | |
1501 | } | |
1502 | ||
4b5f3e45 | 1503 | if (t && t->type == tok_operator && t->content == "=") // initialization |
ef474d24 JS |
1504 | { |
1505 | if (!d->compatible_arity(0)) | |
1506 | throw parse_error("only scalar globals can be initialized"); | |
58701b78 | 1507 | d->set_arity(0, t); |
ef474d24 JS |
1508 | next (); |
1509 | d->init = parse_literal (); | |
1510 | d->type = d->init->type; | |
1511 | t = peek (); | |
1512 | } | |
4b5f3e45 | 1513 | |
c3799d72 AM |
1514 | if (t && t->type == tok_operator && t->content == ";") // termination |
1515 | next(); | |
1516 | ||
4b5f3e45 | 1517 | if (t && t->type == tok_operator && t->content == ",") // next global |
82919855 FCE |
1518 | { |
1519 | next (); | |
1520 | continue; | |
1521 | } | |
56099f08 | 1522 | else |
82919855 | 1523 | break; |
56099f08 FCE |
1524 | } |
1525 | } | |
1526 | ||
1527 | ||
24cb178f FCE |
1528 | void |
1529 | parser::parse_functiondecl (std::vector<functiondecl*>& functions) | |
56099f08 | 1530 | { |
82919855 | 1531 | const token* t = next (); |
6e213f58 | 1532 | if (! (t->type == tok_keyword && t->content == "function")) |
82919855 FCE |
1533 | throw parse_error ("expected 'function'"); |
1534 | ||
56099f08 | 1535 | |
82919855 | 1536 | t = next (); |
6e213f58 DS |
1537 | if (! (t->type == tok_identifier) |
1538 | && ! (t->type == tok_keyword | |
1539 | && (t->content == "string" || t->content == "long"))) | |
56099f08 | 1540 | throw parse_error ("expected identifier"); |
24cb178f FCE |
1541 | |
1542 | for (unsigned i=0; i<functions.size(); i++) | |
1543 | if (functions[i]->name == t->content) | |
1544 | throw parse_error ("duplicate function name"); | |
1545 | ||
1546 | functiondecl *fd = new functiondecl (); | |
56099f08 FCE |
1547 | fd->name = t->content; |
1548 | fd->tok = t; | |
1549 | ||
1550 | t = next (); | |
6a505121 FCE |
1551 | if (t->type == tok_operator && t->content == ":") |
1552 | { | |
1553 | t = next (); | |
6e213f58 | 1554 | if (t->type == tok_keyword && t->content == "string") |
6a505121 | 1555 | fd->type = pe_string; |
6e213f58 | 1556 | else if (t->type == tok_keyword && t->content == "long") |
6a505121 FCE |
1557 | fd->type = pe_long; |
1558 | else throw parse_error ("expected 'string' or 'long'"); | |
1559 | ||
1560 | t = next (); | |
1561 | } | |
1562 | ||
56099f08 FCE |
1563 | if (! (t->type == tok_operator && t->content == "(")) |
1564 | throw parse_error ("expected '('"); | |
1565 | ||
1566 | while (1) | |
1567 | { | |
1568 | t = next (); | |
1569 | ||
1570 | // permit zero-argument fuctions | |
1571 | if (t->type == tok_operator && t->content == ")") | |
1572 | break; | |
1573 | else if (! (t->type == tok_identifier)) | |
1574 | throw parse_error ("expected identifier"); | |
1575 | vardecl* vd = new vardecl; | |
1576 | vd->name = t->content; | |
1577 | vd->tok = t; | |
1578 | fd->formal_args.push_back (vd); | |
1579 | ||
1580 | t = next (); | |
6a505121 FCE |
1581 | if (t->type == tok_operator && t->content == ":") |
1582 | { | |
1583 | t = next (); | |
6e213f58 | 1584 | if (t->type == tok_keyword && t->content == "string") |
6a505121 | 1585 | vd->type = pe_string; |
6e213f58 | 1586 | else if (t->type == tok_keyword && t->content == "long") |
6a505121 FCE |
1587 | vd->type = pe_long; |
1588 | else throw parse_error ("expected 'string' or 'long'"); | |
dff50e09 | 1589 | |
6a505121 FCE |
1590 | t = next (); |
1591 | } | |
56099f08 FCE |
1592 | if (t->type == tok_operator && t->content == ")") |
1593 | break; | |
1594 | if (t->type == tok_operator && t->content == ",") | |
1595 | continue; | |
1596 | else | |
1597 | throw parse_error ("expected ',' or ')'"); | |
1598 | } | |
1599 | ||
54dfabe9 FCE |
1600 | t = peek (); |
1601 | if (t && t->type == tok_embedded) | |
1602 | fd->body = parse_embeddedcode (); | |
1603 | else | |
1604 | fd->body = parse_stmt_block (); | |
24cb178f FCE |
1605 | |
1606 | functions.push_back (fd); | |
2f1a1aea FCE |
1607 | } |
1608 | ||
1609 | ||
9c0c0e46 FCE |
1610 | probe_point* |
1611 | parser::parse_probe_point () | |
2f1a1aea | 1612 | { |
9c0c0e46 | 1613 | probe_point* pl = new probe_point; |
2f1a1aea | 1614 | |
9c0c0e46 | 1615 | while (1) |
2f1a1aea | 1616 | { |
0c218afb | 1617 | const token* t = next (true); // wildcard scanning here |
6e213f58 DS |
1618 | if (! (t->type == tok_identifier |
1619 | // we must allow ".return" and ".function", which are keywords | |
0c218afb | 1620 | || t->type == tok_keyword)) |
b4ceace2 | 1621 | throw parse_error ("expected identifier or '*'"); |
9c0c0e46 | 1622 | |
9c0c0e46 FCE |
1623 | |
1624 | probe_point::component* c = new probe_point::component; | |
1625 | c->functor = t->content; | |
f1a0157a | 1626 | c->tok = t; |
9c0c0e46 | 1627 | pl->components.push_back (c); |
6e3347a9 | 1628 | // NB we may add c->arg soon |
9c0c0e46 FCE |
1629 | |
1630 | t = peek (); | |
a477f3f1 | 1631 | |
6e3347a9 | 1632 | // consume optional parameter |
9c0c0e46 FCE |
1633 | if (t && t->type == tok_operator && t->content == "(") |
1634 | { | |
1635 | next (); // consume "(" | |
1636 | c->arg = parse_literal (); | |
1637 | ||
1638 | t = next (); | |
1639 | if (! (t->type == tok_operator && t->content == ")")) | |
1640 | throw parse_error ("expected ')'"); | |
1641 | ||
1642 | t = peek (); | |
9c0c0e46 | 1643 | } |
9c0c0e46 FCE |
1644 | |
1645 | if (t && t->type == tok_operator && t->content == ".") | |
6e3347a9 FCE |
1646 | { |
1647 | next (); | |
1648 | continue; | |
1649 | } | |
1650 | ||
f1a0157a | 1651 | // We only fall through here at the end of a probe point (past |
6e3347a9 FCE |
1652 | // all the dotted/parametrized components). |
1653 | ||
d898100a FCE |
1654 | if (t && t->type == tok_operator && |
1655 | (t->content == "?" || t->content == "!")) | |
6e3347a9 FCE |
1656 | { |
1657 | pl->optional = true; | |
d898100a FCE |
1658 | if (t->content == "!") pl->sufficient = true; |
1659 | // NB: sufficient implies optional | |
6e3347a9 FCE |
1660 | next (); |
1661 | t = peek (); | |
1662 | // fall through | |
cbbe8080 MH |
1663 | } |
1664 | ||
1665 | if (t && t->type == tok_keyword && t->content == "if") | |
1666 | { | |
1667 | next (); | |
1668 | t = peek (); | |
75686668 | 1669 | if (t && ! (t->type == tok_operator && t->content == "(")) |
cbbe8080 MH |
1670 | throw parse_error ("expected '('"); |
1671 | next (); | |
1672 | ||
1673 | pl->condition = parse_expression (); | |
1674 | ||
1675 | t = peek (); | |
75686668 | 1676 | if (t && ! (t->type == tok_operator && t->content == ")")) |
cbbe8080 MH |
1677 | throw parse_error ("expected ')'"); |
1678 | next (); | |
1679 | t = peek (); | |
1680 | // fall through | |
6e3347a9 FCE |
1681 | } |
1682 | ||
dff50e09 | 1683 | if (t && t->type == tok_operator |
6e3347a9 FCE |
1684 | && (t->content == "{" || t->content == "," || |
1685 | t->content == "=" || t->content == "+=" )) | |
1686 | break; | |
dff50e09 | 1687 | |
d898100a | 1688 | throw parse_error ("expected one of '. , ( ? ! { = +='"); |
2f1a1aea FCE |
1689 | } |
1690 | ||
1691 | return pl; | |
1692 | } | |
1693 | ||
1694 | ||
1695 | literal* | |
1696 | parser::parse_literal () | |
1697 | { | |
1698 | const token* t = next (); | |
56099f08 | 1699 | literal* l; |
2f1a1aea | 1700 | if (t->type == tok_string) |
c5be7511 JS |
1701 | { |
1702 | literal_string *ls = new literal_string (t->content); | |
1703 | ||
1704 | // PR11208: check if the next token is also a string literal; auto-concatenate it | |
1705 | // This is complicated to the extent that we need to skip intermediate whitespace. | |
1706 | // XXX: but not comments | |
1707 | while (peek()->type == tok_string && !input.ate_comment) | |
1708 | ls->value.append(next()->content); // consume and append the token | |
1709 | ||
1710 | l = ls; | |
1711 | } | |
16e8f21f | 1712 | else |
9c0c0e46 | 1713 | { |
16e8f21f JS |
1714 | bool neg = false; |
1715 | if (t->type == tok_operator && t->content == "-") | |
1716 | { | |
1717 | neg = true; | |
1718 | t = next (); | |
1719 | } | |
1720 | ||
1721 | if (t->type == tok_number) | |
1722 | { | |
1723 | const char* startp = t->content.c_str (); | |
1724 | char* endp = (char*) startp; | |
1725 | ||
1726 | // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX | |
1727 | // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX, | |
79e6d33f JS |
1728 | // since the lexer only gives us positive digit strings, but we'll |
1729 | // limit it to LLONG_MIN when a '-' operator is fed into the literal. | |
16e8f21f JS |
1730 | errno = 0; |
1731 | long long value = (long long) strtoull (startp, & endp, 0); | |
16e8f21f | 1732 | if (errno == ERANGE || errno == EINVAL || *endp != '\0' |
79e6d33f | 1733 | || (neg && (unsigned long long) value > 9223372036854775808ULL) |
16e8f21f JS |
1734 | || (unsigned long long) value > 18446744073709551615ULL |
1735 | || value < -9223372036854775807LL-1) | |
dff50e09 | 1736 | throw parse_error ("number invalid or out of range"); |
16e8f21f | 1737 | |
79e6d33f JS |
1738 | if (neg) |
1739 | value = -value; | |
1740 | ||
16e8f21f JS |
1741 | l = new literal_number (value); |
1742 | } | |
1743 | else | |
1744 | throw parse_error ("expected literal string or number"); | |
9c0c0e46 | 1745 | } |
56099f08 FCE |
1746 | |
1747 | l->tok = t; | |
1748 | return l; | |
2f1a1aea FCE |
1749 | } |
1750 | ||
1751 | ||
1752 | if_statement* | |
1753 | parser::parse_if_statement () | |
1754 | { | |
1755 | const token* t = next (); | |
6e213f58 | 1756 | if (! (t->type == tok_keyword && t->content == "if")) |
56099f08 FCE |
1757 | throw parse_error ("expected 'if'"); |
1758 | if_statement* s = new if_statement; | |
1759 | s->tok = t; | |
1760 | ||
1761 | t = next (); | |
2f1a1aea FCE |
1762 | if (! (t->type == tok_operator && t->content == "(")) |
1763 | throw parse_error ("expected '('"); | |
1764 | ||
2f1a1aea FCE |
1765 | s->condition = parse_expression (); |
1766 | ||
1767 | t = next (); | |
1768 | if (! (t->type == tok_operator && t->content == ")")) | |
1769 | throw parse_error ("expected ')'"); | |
1770 | ||
1771 | s->thenblock = parse_statement (); | |
1772 | ||
1773 | t = peek (); | |
6e213f58 | 1774 | if (t && t->type == tok_keyword && t->content == "else") |
2f1a1aea FCE |
1775 | { |
1776 | next (); | |
1777 | s->elseblock = parse_statement (); | |
1778 | } | |
ed10c639 FCE |
1779 | else |
1780 | s->elseblock = 0; // in case not otherwise initialized | |
2f1a1aea FCE |
1781 | |
1782 | return s; | |
1783 | } | |
1784 | ||
1785 | ||
69c68955 FCE |
1786 | expr_statement* |
1787 | parser::parse_expr_statement () | |
1788 | { | |
1789 | expr_statement *es = new expr_statement; | |
1790 | const token* t = peek (); | |
1791 | es->tok = t; | |
1792 | es->value = parse_expression (); | |
1793 | return es; | |
1794 | } | |
1795 | ||
1796 | ||
56099f08 FCE |
1797 | return_statement* |
1798 | parser::parse_return_statement () | |
1799 | { | |
1800 | const token* t = next (); | |
6e213f58 | 1801 | if (! (t->type == tok_keyword && t->content == "return")) |
56099f08 | 1802 | throw parse_error ("expected 'return'"); |
6e213f58 DS |
1803 | if (context != con_function) |
1804 | throw parse_error ("found 'return' not in function context"); | |
56099f08 FCE |
1805 | return_statement* s = new return_statement; |
1806 | s->tok = t; | |
1807 | s->value = parse_expression (); | |
1808 | return s; | |
1809 | } | |
1810 | ||
1811 | ||
1812 | delete_statement* | |
1813 | parser::parse_delete_statement () | |
1814 | { | |
1815 | const token* t = next (); | |
6e213f58 | 1816 | if (! (t->type == tok_keyword && t->content == "delete")) |
56099f08 FCE |
1817 | throw parse_error ("expected 'delete'"); |
1818 | delete_statement* s = new delete_statement; | |
1819 | s->tok = t; | |
1820 | s->value = parse_expression (); | |
1821 | return s; | |
1822 | } | |
1823 | ||
1824 | ||
f3c26ea5 FCE |
1825 | next_statement* |
1826 | parser::parse_next_statement () | |
1827 | { | |
1828 | const token* t = next (); | |
6e213f58 | 1829 | if (! (t->type == tok_keyword && t->content == "next")) |
f3c26ea5 | 1830 | throw parse_error ("expected 'next'"); |
6e213f58 DS |
1831 | if (context != con_probe) |
1832 | throw parse_error ("found 'next' not in probe context"); | |
f3c26ea5 FCE |
1833 | next_statement* s = new next_statement; |
1834 | s->tok = t; | |
1835 | return s; | |
1836 | } | |
1837 | ||
1838 | ||
1839 | break_statement* | |
1840 | parser::parse_break_statement () | |
1841 | { | |
1842 | const token* t = next (); | |
6e213f58 | 1843 | if (! (t->type == tok_keyword && t->content == "break")) |
f3c26ea5 FCE |
1844 | throw parse_error ("expected 'break'"); |
1845 | break_statement* s = new break_statement; | |
1846 | s->tok = t; | |
1847 | return s; | |
1848 | } | |
1849 | ||
1850 | ||
1851 | continue_statement* | |
1852 | parser::parse_continue_statement () | |
1853 | { | |
1854 | const token* t = next (); | |
6e213f58 | 1855 | if (! (t->type == tok_keyword && t->content == "continue")) |
f3c26ea5 FCE |
1856 | throw parse_error ("expected 'continue'"); |
1857 | continue_statement* s = new continue_statement; | |
1858 | s->tok = t; | |
1859 | return s; | |
1860 | } | |
1861 | ||
1862 | ||
69c68955 FCE |
1863 | for_loop* |
1864 | parser::parse_for_loop () | |
1865 | { | |
f3c26ea5 | 1866 | const token* t = next (); |
6e213f58 | 1867 | if (! (t->type == tok_keyword && t->content == "for")) |
f3c26ea5 FCE |
1868 | throw parse_error ("expected 'for'"); |
1869 | for_loop* s = new for_loop; | |
1870 | s->tok = t; | |
1871 | ||
1872 | t = next (); | |
1873 | if (! (t->type == tok_operator && t->content == "(")) | |
1874 | throw parse_error ("expected '('"); | |
1875 | ||
1876 | // initializer + ";" | |
1877 | t = peek (); | |
1878 | if (t && t->type == tok_operator && t->content == ";") | |
1879 | { | |
cbfbbf69 FCE |
1880 | s->init = 0; |
1881 | next (); | |
f3c26ea5 FCE |
1882 | } |
1883 | else | |
1884 | { | |
1885 | s->init = parse_expr_statement (); | |
1886 | t = next (); | |
1887 | if (! (t->type == tok_operator && t->content == ";")) | |
1888 | throw parse_error ("expected ';'"); | |
1889 | } | |
1890 | ||
1891 | // condition + ";" | |
1892 | t = peek (); | |
1893 | if (t && t->type == tok_operator && t->content == ";") | |
1894 | { | |
1895 | literal_number* l = new literal_number(1); | |
1896 | s->cond = l; | |
1897 | s->cond->tok = next (); | |
1898 | } | |
1899 | else | |
1900 | { | |
1901 | s->cond = parse_expression (); | |
1902 | t = next (); | |
1903 | if (! (t->type == tok_operator && t->content == ";")) | |
1904 | throw parse_error ("expected ';'"); | |
1905 | } | |
dff50e09 | 1906 | |
f3c26ea5 FCE |
1907 | // increment + ")" |
1908 | t = peek (); | |
1909 | if (t && t->type == tok_operator && t->content == ")") | |
1910 | { | |
cbfbbf69 FCE |
1911 | s->incr = 0; |
1912 | next (); | |
f3c26ea5 FCE |
1913 | } |
1914 | else | |
1915 | { | |
1916 | s->incr = parse_expr_statement (); | |
1917 | t = next (); | |
1918 | if (! (t->type == tok_operator && t->content == ")")) | |
c958a431 | 1919 | throw parse_error ("expected ')'"); |
f3c26ea5 FCE |
1920 | } |
1921 | ||
1922 | // block | |
1923 | s->block = parse_statement (); | |
1924 | ||
1925 | return s; | |
1926 | } | |
1927 | ||
1928 | ||
1929 | for_loop* | |
1930 | parser::parse_while_loop () | |
1931 | { | |
1932 | const token* t = next (); | |
6e213f58 | 1933 | if (! (t->type == tok_keyword && t->content == "while")) |
f3c26ea5 FCE |
1934 | throw parse_error ("expected 'while'"); |
1935 | for_loop* s = new for_loop; | |
1936 | s->tok = t; | |
1937 | ||
1938 | t = next (); | |
1939 | if (! (t->type == tok_operator && t->content == "(")) | |
1940 | throw parse_error ("expected '('"); | |
1941 | ||
1942 | // dummy init and incr fields | |
cbfbbf69 FCE |
1943 | s->init = 0; |
1944 | s->incr = 0; | |
f3c26ea5 FCE |
1945 | |
1946 | // condition | |
1947 | s->cond = parse_expression (); | |
1948 | ||
f3c26ea5 FCE |
1949 | t = next (); |
1950 | if (! (t->type == tok_operator && t->content == ")")) | |
1951 | throw parse_error ("expected ')'"); | |
dff50e09 | 1952 | |
f3c26ea5 FCE |
1953 | // block |
1954 | s->block = parse_statement (); | |
1955 | ||
1956 | return s; | |
69c68955 FCE |
1957 | } |
1958 | ||
1959 | ||
1960 | foreach_loop* | |
1961 | parser::parse_foreach_loop () | |
1962 | { | |
1963 | const token* t = next (); | |
6e213f58 | 1964 | if (! (t->type == tok_keyword && t->content == "foreach")) |
69c68955 FCE |
1965 | throw parse_error ("expected 'foreach'"); |
1966 | foreach_loop* s = new foreach_loop; | |
1967 | s->tok = t; | |
93484556 | 1968 | s->sort_direction = 0; |
c261711d | 1969 | s->value = NULL; |
27f21e8c | 1970 | s->limit = NULL; |
69c68955 FCE |
1971 | |
1972 | t = next (); | |
1973 | if (! (t->type == tok_operator && t->content == "(")) | |
1974 | throw parse_error ("expected '('"); | |
1975 | ||
c261711d JS |
1976 | symbol* lookahead_sym = NULL; |
1977 | int lookahead_sort = 0; | |
1978 | ||
1979 | t = peek (); | |
1980 | if (t && t->type == tok_identifier) | |
1981 | { | |
1982 | next (); | |
1983 | lookahead_sym = new symbol; | |
1984 | lookahead_sym->tok = t; | |
1985 | lookahead_sym->name = t->content; | |
1986 | ||
1987 | t = peek (); | |
1988 | if (t && t->type == tok_operator && | |
1989 | (t->content == "+" || t->content == "-")) | |
1990 | { | |
1991 | next (); | |
1992 | lookahead_sort = (t->content == "+") ? 1 : -1; | |
1993 | } | |
1994 | ||
1995 | t = peek (); | |
1996 | if (t && t->type == tok_operator && t->content == "=") | |
1997 | { | |
1998 | next (); | |
1999 | s->value = lookahead_sym; | |
2000 | if (lookahead_sort) | |
2001 | { | |
2002 | s->sort_direction = lookahead_sort; | |
2003 | s->sort_column = 0; | |
2004 | } | |
2005 | lookahead_sym = NULL; | |
2006 | } | |
2007 | } | |
2008 | ||
69c68955 FCE |
2009 | // see also parse_array_in |
2010 | ||
2011 | bool parenthesized = false; | |
2012 | t = peek (); | |
c261711d | 2013 | if (!lookahead_sym && t && t->type == tok_operator && t->content == "[") |
69c68955 FCE |
2014 | { |
2015 | next (); | |
2016 | parenthesized = true; | |
2017 | } | |
2018 | ||
c261711d JS |
2019 | if (lookahead_sym) |
2020 | { | |
2021 | s->indexes.push_back (lookahead_sym); | |
2022 | if (lookahead_sort) | |
2023 | { | |
2024 | s->sort_direction = lookahead_sort; | |
2025 | s->sort_column = 1; | |
2026 | } | |
2027 | lookahead_sym = NULL; | |
2028 | } | |
2029 | else while (1) | |
69c68955 FCE |
2030 | { |
2031 | t = next (); | |
2032 | if (! (t->type == tok_identifier)) | |
2033 | throw parse_error ("expected identifier"); | |
2034 | symbol* sym = new symbol; | |
2035 | sym->tok = t; | |
2036 | sym->name = t->content; | |
2037 | s->indexes.push_back (sym); | |
2038 | ||
93484556 FCE |
2039 | t = peek (); |
2040 | if (t && t->type == tok_operator && | |
2041 | (t->content == "+" || t->content == "-")) | |
2042 | { | |
2043 | if (s->sort_direction) | |
2044 | throw parse_error ("multiple sort directives"); | |
2045 | s->sort_direction = (t->content == "+") ? 1 : -1; | |
2046 | s->sort_column = s->indexes.size(); | |
2047 | next(); | |
2048 | } | |
2049 | ||
69c68955 FCE |
2050 | if (parenthesized) |
2051 | { | |
93484556 | 2052 | t = peek (); |
69c68955 FCE |
2053 | if (t && t->type == tok_operator && t->content == ",") |
2054 | { | |
2055 | next (); | |
2056 | continue; | |
2057 | } | |
2058 | else if (t && t->type == tok_operator && t->content == "]") | |
2059 | { | |
2060 | next (); | |
2061 | break; | |
2062 | } | |
dff50e09 | 2063 | else |
69c68955 FCE |
2064 | throw parse_error ("expected ',' or ']'"); |
2065 | } | |
2066 | else | |
2067 | break; // expecting only one expression | |
2068 | } | |
2069 | ||
2070 | t = next (); | |
6e213f58 | 2071 | if (! (t->type == tok_keyword && t->content == "in")) |
69c68955 | 2072 | throw parse_error ("expected 'in'"); |
dff50e09 | 2073 | |
d02548c0 | 2074 | s->base = parse_indexable(); |
69c68955 | 2075 | |
93484556 FCE |
2076 | t = peek (); |
2077 | if (t && t->type == tok_operator && | |
2078 | (t->content == "+" || t->content == "-")) | |
2079 | { | |
2080 | if (s->sort_direction) | |
2081 | throw parse_error ("multiple sort directives"); | |
2082 | s->sort_direction = (t->content == "+") ? 1 : -1; | |
2083 | s->sort_column = 0; | |
2084 | next(); | |
2085 | } | |
2086 | ||
27f21e8c DS |
2087 | t = peek (); |
2088 | if (tok_is(t, tok_keyword, "limit")) | |
2089 | { | |
2090 | next (); // get past the "limit" | |
2091 | s->limit = parse_expression (); | |
2092 | } | |
2093 | ||
69c68955 FCE |
2094 | t = next (); |
2095 | if (! (t->type == tok_operator && t->content == ")")) | |
2096 | throw parse_error ("expected ')'"); | |
2097 | ||
2098 | s->block = parse_statement (); | |
2099 | return s; | |
2100 | } | |
2101 | ||
2102 | ||
2f1a1aea FCE |
2103 | expression* |
2104 | parser::parse_expression () | |
2105 | { | |
2106 | return parse_assignment (); | |
2107 | } | |
2108 | ||
2f1a1aea FCE |
2109 | |
2110 | expression* | |
2111 | parser::parse_assignment () | |
2112 | { | |
2113 | expression* op1 = parse_ternary (); | |
2114 | ||
2115 | const token* t = peek (); | |
82919855 | 2116 | // right-associative operators |
dff50e09 | 2117 | if (t && t->type == tok_operator |
2f1a1aea | 2118 | && (t->content == "=" || |
82919855 | 2119 | t->content == "<<<" || |
2f1a1aea | 2120 | t->content == "+=" || |
bb2e3076 FCE |
2121 | t->content == "-=" || |
2122 | t->content == "*=" || | |
2123 | t->content == "/=" || | |
2124 | t->content == "%=" || | |
2125 | t->content == "<<=" || | |
2126 | t->content == ">>=" || | |
2127 | t->content == "&=" || | |
2128 | t->content == "^=" || | |
2129 | t->content == "|=" || | |
d5d7c2cc | 2130 | t->content == ".=" || |
dff50e09 | 2131 | false)) |
2f1a1aea | 2132 | { |
bb2e3076 | 2133 | // NB: lvalueness is checked during elaboration / translation |
2f1a1aea | 2134 | assignment* e = new assignment; |
56099f08 | 2135 | e->left = op1; |
2f1a1aea | 2136 | e->op = t->content; |
56099f08 | 2137 | e->tok = t; |
2f1a1aea | 2138 | next (); |
82919855 | 2139 | e->right = parse_expression (); |
56099f08 | 2140 | op1 = e; |
2f1a1aea | 2141 | } |
56099f08 FCE |
2142 | |
2143 | return op1; | |
2f1a1aea FCE |
2144 | } |
2145 | ||
2146 | ||
2147 | expression* | |
2148 | parser::parse_ternary () | |
2149 | { | |
2150 | expression* op1 = parse_logical_or (); | |
2151 | ||
2152 | const token* t = peek (); | |
2153 | if (t && t->type == tok_operator && t->content == "?") | |
2154 | { | |
2f1a1aea | 2155 | ternary_expression* e = new ternary_expression; |
56099f08 | 2156 | e->tok = t; |
2f1a1aea | 2157 | e->cond = op1; |
56099f08 FCE |
2158 | next (); |
2159 | e->truevalue = parse_expression (); // XXX | |
2f1a1aea FCE |
2160 | |
2161 | t = next (); | |
2162 | if (! (t->type == tok_operator && t->content == ":")) | |
2163 | throw parse_error ("expected ':'"); | |
2164 | ||
56099f08 | 2165 | e->falsevalue = parse_expression (); // XXX |
2f1a1aea FCE |
2166 | return e; |
2167 | } | |
2168 | else | |
2169 | return op1; | |
2170 | } | |
2171 | ||
2172 | ||
2173 | expression* | |
2174 | parser::parse_logical_or () | |
2175 | { | |
2176 | expression* op1 = parse_logical_and (); | |
dff50e09 | 2177 | |
2f1a1aea | 2178 | const token* t = peek (); |
56099f08 | 2179 | while (t && t->type == tok_operator && t->content == "||") |
2f1a1aea | 2180 | { |
2f1a1aea | 2181 | logical_or_expr* e = new logical_or_expr; |
56099f08 FCE |
2182 | e->tok = t; |
2183 | e->op = t->content; | |
2f1a1aea | 2184 | e->left = op1; |
56099f08 FCE |
2185 | next (); |
2186 | e->right = parse_logical_and (); | |
2187 | op1 = e; | |
2188 | t = peek (); | |
2f1a1aea | 2189 | } |
56099f08 FCE |
2190 | |
2191 | return op1; | |
2f1a1aea FCE |
2192 | } |
2193 | ||
2194 | ||
2195 | expression* | |
2196 | parser::parse_logical_and () | |
2197 | { | |
bb2e3076 | 2198 | expression* op1 = parse_boolean_or (); |
2f1a1aea FCE |
2199 | |
2200 | const token* t = peek (); | |
56099f08 | 2201 | while (t && t->type == tok_operator && t->content == "&&") |
2f1a1aea | 2202 | { |
2f1a1aea FCE |
2203 | logical_and_expr *e = new logical_and_expr; |
2204 | e->left = op1; | |
56099f08 FCE |
2205 | e->op = t->content; |
2206 | e->tok = t; | |
2207 | next (); | |
bb2e3076 FCE |
2208 | e->right = parse_boolean_or (); |
2209 | op1 = e; | |
2210 | t = peek (); | |
2211 | } | |
2212 | ||
2213 | return op1; | |
2214 | } | |
2215 | ||
2216 | ||
2217 | expression* | |
2218 | parser::parse_boolean_or () | |
2219 | { | |
2220 | expression* op1 = parse_boolean_xor (); | |
2221 | ||
2222 | const token* t = peek (); | |
2223 | while (t && t->type == tok_operator && t->content == "|") | |
2224 | { | |
2225 | binary_expression* e = new binary_expression; | |
2226 | e->left = op1; | |
2227 | e->op = t->content; | |
2228 | e->tok = t; | |
2229 | next (); | |
2230 | e->right = parse_boolean_xor (); | |
2231 | op1 = e; | |
2232 | t = peek (); | |
2233 | } | |
2234 | ||
2235 | return op1; | |
2236 | } | |
2237 | ||
2238 | ||
2239 | expression* | |
2240 | parser::parse_boolean_xor () | |
2241 | { | |
2242 | expression* op1 = parse_boolean_and (); | |
2243 | ||
2244 | const token* t = peek (); | |
2245 | while (t && t->type == tok_operator && t->content == "^") | |
2246 | { | |
2247 | binary_expression* e = new binary_expression; | |
2248 | e->left = op1; | |
2249 | e->op = t->content; | |
2250 | e->tok = t; | |
2251 | next (); | |
2252 | e->right = parse_boolean_and (); | |
2253 | op1 = e; | |
2254 | t = peek (); | |
2255 | } | |
2256 | ||
2257 | return op1; | |
2258 | } | |
2259 | ||
2260 | ||
2261 | expression* | |
2262 | parser::parse_boolean_and () | |
2263 | { | |
2264 | expression* op1 = parse_array_in (); | |
2265 | ||
2266 | const token* t = peek (); | |
2267 | while (t && t->type == tok_operator && t->content == "&") | |
2268 | { | |
2269 | binary_expression* e = new binary_expression; | |
2270 | e->left = op1; | |
2271 | e->op = t->content; | |
2272 | e->tok = t; | |
2273 | next (); | |
56099f08 FCE |
2274 | e->right = parse_array_in (); |
2275 | op1 = e; | |
2276 | t = peek (); | |
2f1a1aea | 2277 | } |
56099f08 FCE |
2278 | |
2279 | return op1; | |
2f1a1aea FCE |
2280 | } |
2281 | ||
2282 | ||
2283 | expression* | |
2284 | parser::parse_array_in () | |
2285 | { | |
ce10591c | 2286 | // This is a very tricky case. All these are legit expressions: |
69c68955 | 2287 | // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b" |
ce10591c FCE |
2288 | vector<expression*> indexes; |
2289 | bool parenthesized = false; | |
2f1a1aea FCE |
2290 | |
2291 | const token* t = peek (); | |
69c68955 | 2292 | if (t && t->type == tok_operator && t->content == "[") |
ce10591c FCE |
2293 | { |
2294 | next (); | |
2295 | parenthesized = true; | |
2296 | } | |
2297 | ||
2298 | while (1) | |
2299 | { | |
2300 | expression* op1 = parse_comparison (); | |
2301 | indexes.push_back (op1); | |
2302 | ||
2303 | if (parenthesized) | |
2304 | { | |
2305 | const token* t = peek (); | |
2306 | if (t && t->type == tok_operator && t->content == ",") | |
2307 | { | |
2308 | next (); | |
2309 | continue; | |
2310 | } | |
69c68955 | 2311 | else if (t && t->type == tok_operator && t->content == "]") |
ce10591c FCE |
2312 | { |
2313 | next (); | |
2314 | break; | |
2315 | } | |
dff50e09 | 2316 | else |
69c68955 | 2317 | throw parse_error ("expected ',' or ']'"); |
ce10591c FCE |
2318 | } |
2319 | else | |
2320 | break; // expecting only one expression | |
2321 | } | |
2322 | ||
2323 | t = peek (); | |
6e213f58 | 2324 | if (t && t->type == tok_keyword && t->content == "in") |
2f1a1aea | 2325 | { |
2f1a1aea | 2326 | array_in *e = new array_in; |
56099f08 | 2327 | e->tok = t; |
ce10591c FCE |
2328 | next (); // swallow "in" |
2329 | ||
2330 | arrayindex* a = new arrayindex; | |
2331 | a->indexes = indexes; | |
d02548c0 GH |
2332 | a->base = parse_indexable(); |
2333 | a->tok = a->base->get_tok(); | |
ce10591c | 2334 | e->operand = a; |
2f1a1aea FCE |
2335 | return e; |
2336 | } | |
ce10591c FCE |
2337 | else if (indexes.size() == 1) // no "in" - need one expression only |
2338 | return indexes[0]; | |
2f1a1aea | 2339 | else |
ce10591c | 2340 | throw parse_error ("unexpected comma-separated expression list"); |
2f1a1aea FCE |
2341 | } |
2342 | ||
2343 | ||
2344 | expression* | |
2345 | parser::parse_comparison () | |
2346 | { | |
bb2e3076 | 2347 | expression* op1 = parse_shift (); |
2f1a1aea FCE |
2348 | |
2349 | const token* t = peek (); | |
dff50e09 | 2350 | while (t && t->type == tok_operator |
553d27a5 FCE |
2351 | && (t->content == ">" || |
2352 | t->content == "<" || | |
2353 | t->content == "==" || | |
2354 | t->content == "!=" || | |
2355 | t->content == "<=" || | |
bb2e3076 | 2356 | t->content == ">=")) |
2f1a1aea FCE |
2357 | { |
2358 | comparison* e = new comparison; | |
2359 | e->left = op1; | |
2360 | e->op = t->content; | |
56099f08 | 2361 | e->tok = t; |
2f1a1aea | 2362 | next (); |
bb2e3076 FCE |
2363 | e->right = parse_shift (); |
2364 | op1 = e; | |
2365 | t = peek (); | |
2366 | } | |
2367 | ||
2368 | return op1; | |
2369 | } | |
2370 | ||
2371 | ||
2372 | expression* | |
2373 | parser::parse_shift () | |
2374 | { | |
2375 | expression* op1 = parse_concatenation (); | |
2376 | ||
2377 | const token* t = peek (); | |
dff50e09 | 2378 | while (t && t->type == tok_operator && |
bb2e3076 FCE |
2379 | (t->content == "<<" || t->content == ">>")) |
2380 | { | |
2381 | binary_expression* e = new binary_expression; | |
2382 | e->left = op1; | |
2383 | e->op = t->content; | |
2384 | e->tok = t; | |
2385 | next (); | |
56099f08 FCE |
2386 | e->right = parse_concatenation (); |
2387 | op1 = e; | |
2388 | t = peek (); | |
2f1a1aea | 2389 | } |
56099f08 FCE |
2390 | |
2391 | return op1; | |
2f1a1aea FCE |
2392 | } |
2393 | ||
2394 | ||
2395 | expression* | |
2396 | parser::parse_concatenation () | |
2397 | { | |
2398 | expression* op1 = parse_additive (); | |
2399 | ||
2400 | const token* t = peek (); | |
2401 | // XXX: the actual awk string-concatenation operator is *whitespace*. | |
2402 | // I don't know how to easily to model that here. | |
56099f08 | 2403 | while (t && t->type == tok_operator && t->content == ".") |
2f1a1aea FCE |
2404 | { |
2405 | concatenation* e = new concatenation; | |
2406 | e->left = op1; | |
2407 | e->op = t->content; | |
56099f08 | 2408 | e->tok = t; |
2f1a1aea | 2409 | next (); |
56099f08 FCE |
2410 | e->right = parse_additive (); |
2411 | op1 = e; | |
2412 | t = peek (); | |
2f1a1aea | 2413 | } |
56099f08 FCE |
2414 | |
2415 | return op1; | |
2f1a1aea FCE |
2416 | } |
2417 | ||
2418 | ||
2419 | expression* | |
2420 | parser::parse_additive () | |
2421 | { | |
2422 | expression* op1 = parse_multiplicative (); | |
2423 | ||
2424 | const token* t = peek (); | |
dff50e09 | 2425 | while (t && t->type == tok_operator |
2f1a1aea FCE |
2426 | && (t->content == "+" || t->content == "-")) |
2427 | { | |
2428 | binary_expression* e = new binary_expression; | |
2429 | e->op = t->content; | |
2430 | e->left = op1; | |
56099f08 | 2431 | e->tok = t; |
2f1a1aea | 2432 | next (); |
56099f08 FCE |
2433 | e->right = parse_multiplicative (); |
2434 | op1 = e; | |
2435 | t = peek (); | |
2f1a1aea | 2436 | } |
56099f08 FCE |
2437 | |
2438 | return op1; | |
2f1a1aea FCE |
2439 | } |
2440 | ||
2441 | ||
2442 | expression* | |
2443 | parser::parse_multiplicative () | |
2444 | { | |
2445 | expression* op1 = parse_unary (); | |
2446 | ||
2447 | const token* t = peek (); | |
dff50e09 | 2448 | while (t && t->type == tok_operator |
2f1a1aea FCE |
2449 | && (t->content == "*" || t->content == "/" || t->content == "%")) |
2450 | { | |
2451 | binary_expression* e = new binary_expression; | |
2452 | e->op = t->content; | |
2453 | e->left = op1; | |
56099f08 | 2454 | e->tok = t; |
2f1a1aea | 2455 | next (); |
56099f08 FCE |
2456 | e->right = parse_unary (); |
2457 | op1 = e; | |
2458 | t = peek (); | |
2f1a1aea | 2459 | } |
56099f08 FCE |
2460 | |
2461 | return op1; | |
2f1a1aea FCE |
2462 | } |
2463 | ||
2464 | ||
2465 | expression* | |
2466 | parser::parse_unary () | |
2467 | { | |
2468 | const token* t = peek (); | |
dff50e09 FCE |
2469 | if (t && t->type == tok_operator |
2470 | && (t->content == "+" || | |
2471 | t->content == "-" || | |
bb2e3076 FCE |
2472 | t->content == "!" || |
2473 | t->content == "~" || | |
2474 | false)) | |
2f1a1aea FCE |
2475 | { |
2476 | unary_expression* e = new unary_expression; | |
2477 | e->op = t->content; | |
56099f08 | 2478 | e->tok = t; |
2f1a1aea | 2479 | next (); |
1cb79a72 | 2480 | e->operand = parse_unary (); |
2f1a1aea FCE |
2481 | return e; |
2482 | } | |
2483 | else | |
bb2e3076 | 2484 | return parse_crement (); |
2f1a1aea FCE |
2485 | } |
2486 | ||
2487 | ||
2488 | expression* | |
2489 | parser::parse_crement () // as in "increment" / "decrement" | |
2490 | { | |
cbfbbf69 FCE |
2491 | // NB: Ideally, we'd parse only a symbol as an operand to the |
2492 | // *crement operators, instead of a general expression value. We'd | |
2493 | // need more complex lookahead code to tell apart the postfix cases. | |
2494 | // So we just punt, and leave it to pass-3 to signal errors on | |
2495 | // cases like "4++". | |
2496 | ||
2f1a1aea | 2497 | const token* t = peek (); |
dff50e09 | 2498 | if (t && t->type == tok_operator |
2f1a1aea FCE |
2499 | && (t->content == "++" || t->content == "--")) |
2500 | { | |
2501 | pre_crement* e = new pre_crement; | |
2502 | e->op = t->content; | |
56099f08 | 2503 | e->tok = t; |
2f1a1aea FCE |
2504 | next (); |
2505 | e->operand = parse_value (); | |
2506 | return e; | |
2507 | } | |
2508 | ||
2509 | // post-crement or non-crement | |
2510 | expression *op1 = parse_value (); | |
dff50e09 | 2511 | |
2f1a1aea | 2512 | t = peek (); |
dff50e09 | 2513 | if (t && t->type == tok_operator |
2f1a1aea FCE |
2514 | && (t->content == "++" || t->content == "--")) |
2515 | { | |
2516 | post_crement* e = new post_crement; | |
2517 | e->op = t->content; | |
56099f08 | 2518 | e->tok = t; |
2f1a1aea FCE |
2519 | next (); |
2520 | e->operand = op1; | |
2521 | return e; | |
2522 | } | |
2523 | else | |
2524 | return op1; | |
2525 | } | |
2526 | ||
2527 | ||
2528 | expression* | |
2529 | parser::parse_value () | |
2530 | { | |
2531 | const token* t = peek (); | |
2532 | if (! t) | |
2533 | throw parse_error ("expected value"); | |
2534 | ||
7d902887 FCE |
2535 | if (t->type == tok_embedded) |
2536 | { | |
2537 | next (); | |
2538 | if (! privileged) | |
2539 | throw parse_error ("embedded expression code in unprivileged script", false); | |
2540 | ||
2541 | embedded_expr *e = new embedded_expr; | |
2542 | e->tok = t; | |
2543 | e->code = t->content; | |
2544 | return e; | |
2545 | } | |
2546 | ||
2f1a1aea FCE |
2547 | if (t->type == tok_operator && t->content == "(") |
2548 | { | |
2549 | next (); | |
2550 | expression* e = parse_expression (); | |
2551 | t = next (); | |
2552 | if (! (t->type == tok_operator && t->content == ")")) | |
2553 | throw parse_error ("expected ')'"); | |
2554 | return e; | |
2555 | } | |
03c75a4a JS |
2556 | else if (t->type == tok_operator && t->content == "&") |
2557 | { | |
2558 | next (); | |
d48afc20 | 2559 | return parse_target_symbol (t); |
03c75a4a | 2560 | } |
2f1a1aea FCE |
2561 | else if (t->type == tok_identifier) |
2562 | return parse_symbol (); | |
2563 | else | |
2564 | return parse_literal (); | |
2565 | } | |
2566 | ||
2567 | ||
d02548c0 GH |
2568 | const token * |
2569 | parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name) | |
2570 | { | |
2571 | hop = NULL; | |
2572 | const token* t = expect_ident (name); | |
2573 | if (name == "@hist_linear" || name == "@hist_log") | |
2574 | { | |
2575 | hop = new hist_op; | |
2576 | if (name == "@hist_linear") | |
2577 | hop->htype = hist_linear; | |
2578 | else if (name == "@hist_log") | |
2579 | hop->htype = hist_log; | |
2580 | hop->tok = t; | |
2581 | expect_op("("); | |
2582 | hop->stat = parse_expression (); | |
2583 | int64_t tnum; | |
2584 | if (hop->htype == hist_linear) | |
2585 | { | |
2586 | for (size_t i = 0; i < 3; ++i) | |
2587 | { | |
2588 | expect_op (","); | |
2589 | expect_number (tnum); | |
2590 | hop->params.push_back (tnum); | |
2591 | } | |
2592 | } | |
d02548c0 GH |
2593 | expect_op(")"); |
2594 | } | |
2595 | return t; | |
2596 | } | |
2597 | ||
2598 | ||
2599 | indexable* | |
2600 | parser::parse_indexable () | |
2601 | { | |
2602 | hist_op *hop = NULL; | |
2603 | string name; | |
2604 | const token *tok = parse_hist_op_or_bare_name(hop, name); | |
2605 | if (hop) | |
2606 | return hop; | |
2607 | else | |
2608 | { | |
2609 | symbol* sym = new symbol; | |
2610 | sym->name = name; | |
2611 | sym->tok = tok; | |
2612 | return sym; | |
2613 | } | |
2614 | } | |
2615 | ||
2616 | ||
2617 | // var, indexable[index], func(parms), printf("...", ...), $var, $var->member, @stat_op(stat) | |
30263a73 | 2618 | expression* parser::parse_symbol () |
2f1a1aea | 2619 | { |
d02548c0 GH |
2620 | hist_op *hop = NULL; |
2621 | symbol *sym = NULL; | |
d7f3e0c5 | 2622 | string name; |
d02548c0 GH |
2623 | const token *t = parse_hist_op_or_bare_name(hop, name); |
2624 | ||
2625 | if (!hop) | |
0fefb486 | 2626 | { |
dff50e09 | 2627 | // If we didn't get a hist_op, then we did get an identifier. We can |
d02548c0 GH |
2628 | // now scrutinize this identifier for the various magic forms of identifier |
2629 | // (printf, @stat_op, and $var...) | |
2630 | ||
30263a73 FCE |
2631 | if (name == "@cast" || (name.size()>0 && name[0] == '$')) |
2632 | return parse_target_symbol (t); | |
9b5af295 | 2633 | |
db135493 FCE |
2634 | // NB: PR11343: @defined() is not incompatible with earlier versions |
2635 | // of stap, so no need to check session.compatible for 1.2 | |
30263a73 FCE |
2636 | if (name == "@defined") |
2637 | return parse_defined_op (t); | |
2638 | ||
9b5af295 | 2639 | else if (name.size() > 0 && name[0] == '@') |
d7f3e0c5 | 2640 | { |
d02548c0 GH |
2641 | stat_op *sop = new stat_op; |
2642 | if (name == "@avg") | |
2643 | sop->ctype = sc_average; | |
2644 | else if (name == "@count") | |
2645 | sop->ctype = sc_count; | |
2646 | else if (name == "@sum") | |
2647 | sop->ctype = sc_sum; | |
2648 | else if (name == "@min") | |
2649 | sop->ctype = sc_min; | |
2650 | else if (name == "@max") | |
2651 | sop->ctype = sc_max; | |
2652 | else | |
2653 | throw parse_error("unknown statistic operator " + name); | |
2654 | expect_op("("); | |
2655 | sop->tok = t; | |
2656 | sop->stat = parse_expression (); | |
2657 | expect_op(")"); | |
2658 | return sop; | |
2659 | } | |
dff50e09 | 2660 | |
d5e178c1 | 2661 | else if (print_format *fmt = print_format::create(t)) |
d02548c0 | 2662 | { |
d02548c0 | 2663 | expect_op("("); |
b15c465c PP |
2664 | if ((name == "print" || name == "println" || |
2665 | name == "sprint" || name == "sprintln") && | |
3cb17058 | 2666 | (peek_kw("@hist_linear") || peek_kw("@hist_log"))) |
a4636912 GH |
2667 | { |
2668 | // We have a special case where we recognize | |
2669 | // print(@hist_foo(bar)) as a magic print-the-histogram | |
2670 | // construct. This is sort of gross but it avoids | |
2671 | // promoting histogram references to typeful | |
2672 | // expressions. | |
dff50e09 | 2673 | |
1bbeef03 GH |
2674 | hop = NULL; |
2675 | t = parse_hist_op_or_bare_name(hop, name); | |
2676 | assert(hop); | |
dff50e09 | 2677 | |
1bbeef03 GH |
2678 | // It is, sadly, possible that even while parsing a |
2679 | // hist_op, we *mis-guessed* and the user wishes to | |
2680 | // print(@hist_op(foo)[bucket]), a scalar. In that case | |
2681 | // we must parse the arrayindex and print an expression. | |
839325a1 JS |
2682 | // |
2683 | // XXX: This still fails if the arrayindex is part of a | |
2684 | // larger expression. To really handle everything, we'd | |
2685 | // need to push back all the hist tokens start over. | |
dff50e09 | 2686 | |
1bbeef03 GH |
2687 | if (!peek_op ("[")) |
2688 | fmt->hist = hop; | |
2689 | else | |
2690 | { | |
2691 | // This is simplified version of the | |
2692 | // multi-array-index parser below, because we can | |
2693 | // only ever have one index on a histogram anyways. | |
2694 | expect_op("["); | |
2695 | struct arrayindex* ai = new arrayindex; | |
2696 | ai->tok = t; | |
2697 | ai->base = hop; | |
2698 | ai->indexes.push_back (parse_expression ()); | |
2699 | expect_op("]"); | |
2700 | fmt->args.push_back(ai); | |
839325a1 JS |
2701 | |
2702 | // Consume any subsequent arguments. | |
2703 | while (!peek_op (")")) | |
2704 | { | |
2705 | expect_op(","); | |
2706 | expression *e = parse_expression (); | |
2707 | fmt->args.push_back(e); | |
2708 | } | |
1bbeef03 | 2709 | } |
a4636912 | 2710 | } |
d7f3e0c5 | 2711 | else |
d02548c0 | 2712 | { |
3cb17058 JS |
2713 | int min_args = 0; |
2714 | if (fmt->print_with_format) | |
2715 | { | |
2716 | // Consume and convert a format string. Agreement between the | |
2717 | // format string and the arguments is postponed to the | |
2718 | // typechecking phase. | |
2719 | string tmp; | |
2720 | expect_unknown (tok_string, tmp); | |
2721 | fmt->raw_components = tmp; | |
2722 | fmt->components = print_format::string_to_components (tmp); | |
2723 | } | |
2724 | else if (fmt->print_with_delim) | |
2725 | { | |
2726 | // Consume a delimiter to separate arguments. | |
2727 | fmt->delimiter.clear(); | |
2728 | fmt->delimiter.type = print_format::conv_literal; | |
2729 | expect_unknown (tok_string, fmt->delimiter.literal_string); | |
2730 | min_args = 2; | |
2731 | } | |
2732 | else | |
2733 | { | |
2734 | // If we are not printing with a format string, we must have | |
2735 | // at least one argument (of any type). | |
2736 | expression *e = parse_expression (); | |
2737 | fmt->args.push_back(e); | |
2738 | } | |
2739 | ||
2740 | // Consume any subsequent arguments. | |
2741 | while (min_args || !peek_op (")")) | |
2742 | { | |
2743 | expect_op(","); | |
2744 | expression *e = parse_expression (); | |
2745 | fmt->args.push_back(e); | |
2746 | if (min_args) | |
2747 | --min_args; | |
2748 | } | |
d02548c0 GH |
2749 | } |
2750 | expect_op(")"); | |
2751 | return fmt; | |
2752 | } | |
dff50e09 | 2753 | |
d02548c0 GH |
2754 | else if (peek_op ("(")) // function call |
2755 | { | |
2756 | next (); | |
2757 | struct functioncall* f = new functioncall; | |
2758 | f->tok = t; | |
2759 | f->function = name; | |
2760 | // Allow empty actual parameter list | |
2761 | if (peek_op (")")) | |
2762 | { | |
2763 | next (); | |
2764 | return f; | |
2765 | } | |
2766 | while (1) | |
2767 | { | |
2768 | f->args.push_back (parse_expression ()); | |
2769 | if (peek_op (")")) | |
2770 | { | |
2771 | next(); | |
2772 | break; | |
2773 | } | |
2774 | else if (peek_op (",")) | |
2775 | { | |
2776 | next(); | |
2777 | continue; | |
2778 | } | |
2779 | else | |
2780 | throw parse_error ("expected ',' or ')'"); | |
2781 | } | |
2782 | return f; | |
2783 | } | |
2784 | ||
2785 | else | |
2786 | { | |
2787 | sym = new symbol; | |
2788 | sym->name = name; | |
2789 | sym->tok = t; | |
d7f3e0c5 | 2790 | } |
0fefb486 | 2791 | } |
dff50e09 FCE |
2792 | |
2793 | // By now, either we had a hist_op in the first place, or else | |
d02548c0 GH |
2794 | // we had a plain word and it was converted to a symbol. |
2795 | ||
70c743d8 | 2796 | assert (!hop != !sym); // logical XOR |
d02548c0 GH |
2797 | |
2798 | // All that remains is to check for array indexing | |
2799 | ||
d7f3e0c5 | 2800 | if (peek_op ("[")) // array |
2f1a1aea FCE |
2801 | { |
2802 | next (); | |
2803 | struct arrayindex* ai = new arrayindex; | |
d02548c0 GH |
2804 | ai->tok = t; |
2805 | ||
2806 | if (hop) | |
2807 | ai->base = hop; | |
2808 | else | |
2809 | ai->base = sym; | |
2810 | ||
2f1a1aea FCE |
2811 | while (1) |
2812 | { | |
2813 | ai->indexes.push_back (parse_expression ()); | |
d7f3e0c5 | 2814 | if (peek_op ("]")) |
dff50e09 FCE |
2815 | { |
2816 | next(); | |
2817 | break; | |
d7f3e0c5 GH |
2818 | } |
2819 | else if (peek_op (",")) | |
2820 | { | |
2821 | next(); | |
2822 | continue; | |
2823 | } | |
2f1a1aea FCE |
2824 | else |
2825 | throw parse_error ("expected ',' or ']'"); | |
2826 | } | |
2827 | return ai; | |
2828 | } | |
d02548c0 GH |
2829 | |
2830 | // If we got to here, we *should* have a symbol; if we have | |
2831 | // a hist_op on its own, it doesn't count as an expression, | |
2832 | // so we throw a parse error. | |
2833 | ||
2834 | if (hop) | |
2835 | throw parse_error("base histogram operator where expression expected", t); | |
dff50e09 FCE |
2836 | |
2837 | return sym; | |
2f1a1aea | 2838 | } |
56099f08 | 2839 | |
81931eab | 2840 | |
30263a73 FCE |
2841 | // Parse a @cast or $var. Given head token has already been consumed. |
2842 | target_symbol* parser::parse_target_symbol (const token* t) | |
2843 | { | |
d48afc20 JS |
2844 | bool addressof = false; |
2845 | if (t->type == tok_operator && t->content == "&") | |
2846 | { | |
2847 | addressof = true; | |
2848 | t = next (); | |
2849 | } | |
2850 | ||
30263a73 FCE |
2851 | if (t->type == tok_identifier && t->content == "@cast") |
2852 | { | |
2853 | cast_op *cop = new cast_op; | |
2854 | cop->tok = t; | |
2855 | cop->base_name = t->content; | |
2856 | expect_op("("); | |
2857 | cop->operand = parse_expression (); | |
2858 | expect_op(","); | |
2859 | expect_unknown(tok_string, cop->type); | |
2860 | // types never start with "struct<space>" or "union<space>", | |
2861 | // so gobble it up. | |
60d98537 | 2862 | if (startswith(cop->type, "struct ")) |
30263a73 | 2863 | cop->type = cop->type.substr(7); |
60d98537 | 2864 | if (startswith(cop->type, "union ")) |
30263a73 FCE |
2865 | cop->type = cop->type.substr(6); |
2866 | if (peek_op (",")) | |
2867 | { | |
2868 | next(); | |
2869 | expect_unknown(tok_string, cop->module); | |
2870 | } | |
2871 | expect_op(")"); | |
2872 | parse_target_symbol_components(cop); | |
d48afc20 | 2873 | cop->addressof = addressof; |
30263a73 FCE |
2874 | return cop; |
2875 | } | |
2876 | ||
2877 | if (t->type == tok_identifier && t->content[0]=='$') | |
2878 | { | |
2879 | // target_symbol time | |
2880 | target_symbol *tsym = new target_symbol; | |
2881 | tsym->tok = t; | |
2882 | tsym->base_name = t->content; | |
2883 | parse_target_symbol_components(tsym); | |
d48afc20 | 2884 | tsym->addressof = addressof; |
30263a73 FCE |
2885 | return tsym; |
2886 | } | |
2887 | ||
2888 | throw parse_error ("expected @cast or $var"); | |
2889 | } | |
2890 | ||
2891 | ||
2892 | // Parse a @defined(). Given head token has already been consumed. | |
2893 | expression* parser::parse_defined_op (const token* t) | |
2894 | { | |
2895 | defined_op* dop = new defined_op; | |
2896 | dop->tok = t; | |
2897 | expect_op("("); | |
30263a73 | 2898 | // no need for parse_hist_op... etc., as @defined takes only target_symbols as its operand. |
d48afc20 | 2899 | const token* tt = next (); |
30263a73 FCE |
2900 | dop->operand = parse_target_symbol (tt); |
2901 | expect_op(")"); | |
2902 | return dop; | |
2903 | } | |
2904 | ||
2905 | ||
2906 | ||
81931eab JS |
2907 | void |
2908 | parser::parse_target_symbol_components (target_symbol* e) | |
2909 | { | |
5f36109e JS |
2910 | bool pprint = false; |
2911 | ||
2912 | // check for pretty-print in the form $foo$ | |
2913 | string &base = e->base_name; | |
2914 | size_t pprint_pos = base.find_last_not_of('$'); | |
2915 | if (0 < pprint_pos && pprint_pos < base.length() - 1) | |
2916 | { | |
2917 | string pprint_val = base.substr(pprint_pos + 1); | |
2918 | base.erase(pprint_pos + 1); | |
2919 | e->components.push_back (target_symbol::component(e->tok, pprint_val, true)); | |
2920 | pprint = true; | |
2921 | } | |
2922 | ||
2923 | while (!pprint) | |
81931eab | 2924 | { |
81931eab JS |
2925 | if (peek_op ("->")) |
2926 | { | |
c67847a0 JS |
2927 | const token* t = next(); |
2928 | string member; | |
2929 | expect_ident_or_keyword (member); | |
5f36109e JS |
2930 | |
2931 | // check for pretty-print in the form $foo->$ or $foo->bar$ | |
2932 | pprint_pos = member.find_last_not_of('$'); | |
2933 | string pprint_val; | |
2934 | if (pprint_pos == string::npos || pprint_pos < member.length() - 1) | |
2935 | { | |
2936 | pprint_val = member.substr(pprint_pos + 1); | |
2937 | member.erase(pprint_pos + 1); | |
2938 | pprint = true; | |
2939 | } | |
2940 | ||
2941 | if (!member.empty()) | |
2942 | e->components.push_back (target_symbol::component(t, member)); | |
2943 | if (pprint) | |
2944 | e->components.push_back (target_symbol::component(t, pprint_val, true)); | |
81931eab JS |
2945 | } |
2946 | else if (peek_op ("[")) | |
2947 | { | |
c67847a0 | 2948 | const token* t = next(); |
6fda2dff JS |
2949 | expression* index = parse_expression(); |
2950 | literal_number* ln = dynamic_cast<literal_number*>(index); | |
2951 | if (ln) | |
2952 | e->components.push_back (target_symbol::component(t, ln->value)); | |
2953 | else | |
2954 | e->components.push_back (target_symbol::component(t, index)); | |
81931eab | 2955 | expect_op ("]"); |
81931eab JS |
2956 | } |
2957 | else | |
2958 | break; | |
2959 | } | |
5f36109e JS |
2960 | |
2961 | if (!pprint) | |
2962 | { | |
2963 | // check for pretty-print in the form $foo $ | |
2964 | // i.e. as a separate token, esp. for $foo[i]$ and @cast(...)$ | |
2965 | const token* t = peek(); | |
2966 | if (t->type == tok_identifier && | |
2967 | t->content.find_first_not_of('$') == string::npos) | |
2968 | { | |
2969 | t = next(); | |
2970 | e->components.push_back (target_symbol::component(t, t->content, true)); | |
2971 | pprint = true; | |
2972 | } | |
2973 | } | |
2974 | ||
2975 | if (pprint && (peek_op ("->") || peek_op("["))) | |
2976 | throw parse_error("can't dereference after pretty-printing"); | |
81931eab JS |
2977 | } |
2978 | ||
73267b89 | 2979 | /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */ |