1 // recursive descent parser for systemtap scripts
2 // Copyright (C) 2005-2009 Red Hat Inc.
3 // Copyright (C) 2006 Intel Corporation.
4 // Copyright (C) 2007 Bull S.A.S
6 // This file is part of systemtap, and is free software. You can
7 // redistribute it and/or modify it under the terms of the GNU General
8 // Public License (GPL); either version 2, or (at your option) any
36 // ------------------------------------------------------------------------
40 parser::parser (systemtap_session
& s
, istream
& i
, bool p
):
42 input_name ("<input>"), free_input (0),
43 input (i
, input_name
, s
), privileged (p
),
44 context(con_unknown
), last_t (0), next_t (0), num_errors (0)
47 parser::parser (systemtap_session
& s
, const string
& fn
, bool p
):
49 input_name (fn
), free_input (new ifstream (input_name
.c_str(), ios::in
)),
50 input (* free_input
, input_name
, s
), privileged (p
),
51 context(con_unknown
), last_t (0), next_t (0), num_errors (0)
56 if (free_input
) delete free_input
;
61 parser::parse (systemtap_session
& s
, std::istream
& i
, bool pr
)
69 parser::parse (systemtap_session
& s
, const std::string
& n
, bool pr
)
80 case tok_junk
: return "junk";
81 case tok_identifier
: return "identifier";
82 case tok_operator
: return "operator";
83 case tok_string
: return "string";
84 case tok_number
: return "number";
85 case tok_embedded
: return "embedded-code";
86 case tok_keyword
: return "keyword";
88 return "unknown token";
92 operator << (ostream
& o
, const source_loc
& loc
)
102 operator << (ostream
& o
, const token
& t
)
106 if (t
.type
!= tok_embedded
&& t
.type
!= tok_keyword
) // XXX: other types?
109 for (unsigned i
=0; i
<t
.content
.length(); i
++)
111 char c
= t
.content
[i
];
112 o
<< (isprint (c
) ? c
: '?');
125 parser::print_error (const parse_error
&pe
)
127 string
align_parse_error (" ");
128 cerr
<< "parse error: " << pe
.what () << endl
;
132 cerr
<< "\tat: " << *pe
.tok
<< endl
;
133 session
.print_error_source (cerr
, align_parse_error
, pe
.tok
);
137 const token
* t
= last_t
;
140 cerr
<< "\tsaw: " << *t
<< endl
;
141 session
.print_error_source (cerr
, align_parse_error
, t
);
144 cerr
<< "\tsaw: " << input_name
<< " EOF" << endl
;
147 // XXX: make it possible to print the last input line,
148 // so as to line up an arrow with the specific error column
162 template <typename OPERAND
>
163 bool eval_comparison (const OPERAND
& lhs
, const token
* op
, const OPERAND
& rhs
)
165 if (op
->type
== tok_operator
&& op
->content
== "<=")
166 { return lhs
<= rhs
; }
167 else if (op
->type
== tok_operator
&& op
->content
== ">=")
168 { return lhs
>= rhs
; }
169 else if (op
->type
== tok_operator
&& op
->content
== "<")
170 { return lhs
< rhs
; }
171 else if (op
->type
== tok_operator
&& op
->content
== ">")
172 { return lhs
> rhs
; }
173 else if (op
->type
== tok_operator
&& op
->content
== "==")
174 { return lhs
== rhs
; }
175 else if (op
->type
== tok_operator
&& op
->content
== "!=")
176 { return lhs
!= rhs
; }
178 throw parse_error ("expected comparison operator", op
);
182 // Here, we perform on-the-fly preprocessing.
183 // The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
184 // where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
185 // or: arch COMPARISON-OP "arch-string"
186 // or: "string1" COMPARISON-OP "string2"
187 // or: number1 COMPARISON-OP number2
188 // The %: ELSE-TOKENS part is optional.
190 // e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
191 // e.g. %( arch != "i?86" %? "foo" %: "baz" %)
193 // Up to an entire %( ... %) expression is processed by a single call
194 // to this function. Tokens included by any nested conditions are
195 // enqueued in a private vector.
197 bool eval_pp_conditional (systemtap_session
& s
,
198 const token
* l
, const token
* op
, const token
* r
)
200 if (l
->type
== tok_identifier
&& (l
->content
== "kernel_v" ||
201 l
->content
== "kernel_vr"))
203 string target_kernel_vr
= s
.kernel_release
;
204 string target_kernel_v
= s
.kernel_base_release
;
206 if (! (r
->type
== tok_string
))
207 throw parse_error ("expected string literal", r
);
209 string target
= (l
->content
== "kernel_vr" ?
210 target_kernel_vr
.c_str() :
211 target_kernel_v
.c_str());
212 string query
= r
->content
;
213 bool rhs_wildcard
= (strpbrk (query
.c_str(), "*?[") != 0);
215 // collect acceptable strverscmp results.
216 int rvc_ok1
, rvc_ok2
;
218 if (op
->type
== tok_operator
&& op
->content
== "<=")
219 { rvc_ok1
= -1; rvc_ok2
= 0; }
220 else if (op
->type
== tok_operator
&& op
->content
== ">=")
221 { rvc_ok1
= 1; rvc_ok2
= 0; }
222 else if (op
->type
== tok_operator
&& op
->content
== "<")
223 { rvc_ok1
= -1; rvc_ok2
= -1; }
224 else if (op
->type
== tok_operator
&& op
->content
== ">")
225 { rvc_ok1
= 1; rvc_ok2
= 1; }
226 else if (op
->type
== tok_operator
&& op
->content
== "==")
227 { rvc_ok1
= 0; rvc_ok2
= 0; wc_ok
= true; }
228 else if (op
->type
== tok_operator
&& op
->content
== "!=")
229 { rvc_ok1
= -1; rvc_ok2
= 1; wc_ok
= true; }
231 throw parse_error ("expected comparison operator", op
);
233 if ((!wc_ok
) && rhs_wildcard
)
234 throw parse_error ("wildcard not allowed with order comparison operators", op
);
238 int rvc_result
= fnmatch (query
.c_str(), target
.c_str(),
239 FNM_NOESCAPE
); // spooky
240 bool badness
= (rvc_result
== 0) ^ (op
->content
== "==");
245 int rvc_result
= strverscmp (target
.c_str(), query
.c_str());
246 // normalize rvc_result
247 if (rvc_result
< 0) rvc_result
= -1;
248 if (rvc_result
> 0) rvc_result
= 1;
249 return (rvc_result
== rvc_ok1
|| rvc_result
== rvc_ok2
);
252 else if (l
->type
== tok_identifier
&& l
->content
== "arch")
254 string target_architecture
= s
.architecture
;
255 if (! (r
->type
== tok_string
))
256 throw parse_error ("expected string literal", r
);
257 string query_architecture
= r
->content
;
259 int nomatch
= fnmatch (query_architecture
.c_str(),
260 target_architecture
.c_str(),
261 FNM_NOESCAPE
); // still spooky
264 if (op
->type
== tok_operator
&& op
->content
== "==")
266 else if (op
->type
== tok_operator
&& op
->content
== "!=")
269 throw parse_error ("expected '==' or '!='", op
);
273 else if (l
->type
== tok_string
&& r
->type
== tok_string
)
275 string lhs
= l
->content
;
276 string rhs
= r
->content
;
277 return eval_comparison (lhs
, op
, rhs
);
278 // NB: no wildcarding option here
280 else if (l
->type
== tok_number
&& r
->type
== tok_number
)
282 int64_t lhs
= lex_cast
<int64_t>(l
->content
);
283 int64_t rhs
= lex_cast
<int64_t>(r
->content
);
284 return eval_comparison (lhs
, op
, rhs
);
285 // NB: no wildcarding option here
287 else if (l
->type
== tok_string
&& r
->type
== tok_number
288 && op
->type
== tok_operator
)
289 throw parse_error ("expected string literal as right value", r
);
290 else if (l
->type
== tok_number
&& r
->type
== tok_string
291 && op
->type
== tok_operator
)
292 throw parse_error ("expected number literal as right value", r
);
294 // XXX: support other forms? "CONFIG_SMP" ?
297 throw parse_error ("expected 'arch' or 'kernel_v' or 'kernel_vr'\n"
298 " or comparison between strings or integers", l
);
302 // Only tokens corresponding to the TRUE statement must be expanded
304 parser::scan_pp (bool wildcard
)
308 if (enqueued_pp
.size() > 0)
310 const token
* t
= enqueued_pp
[0];
311 enqueued_pp
.erase (enqueued_pp
.begin());
315 const token
* t
= input
.scan (wildcard
); // NB: not recursive!
319 if (! (t
->type
== tok_operator
&& t
->content
== "%(")) // ordinary token
322 // We have a %( - it's time to throw a preprocessing party!
324 const token
*l
, *op
, *r
;
325 l
= input
.scan (false); // NB: not recursive, though perhaps could be
326 op
= input
.scan (false);
327 r
= input
.scan (false);
328 if (l
== 0 || op
== 0 || r
== 0)
329 throw parse_error ("incomplete condition after '%('", t
);
330 // NB: consider generalizing to consume all tokens until %?, and
331 // passing that as a vector to an evaluator.
333 // Do not evaluate the condition if we haven't expanded everything.
334 // This may occur when having several recursive conditionals.
335 bool result
= eval_pp_conditional (session
, l
, op
, r
);
341 clog << "PP eval (" << *t << ") == " << result << endl;
344 const token
*m
= input
.scan (); // NB: not recursive
345 if (! (m
&& m
->type
== tok_operator
&& m
->content
== "%?"))
346 throw parse_error ("expected '%?' marker for conditional", t
);
349 vector
<const token
*> my_enqueued_pp
;
352 while (true) // consume THEN tokens
356 m
= result
? scan_pp (wildcard
) : input
.scan (wildcard
);
358 catch (const parse_error
&e
)
360 if (result
) throw e
; // propagate errors if THEN branch taken
364 if (m
&& m
->type
== tok_operator
&& m
->content
== "%(") // nested %(
366 if (nesting
== 0 && m
&& (m
->type
== tok_operator
&& (m
->content
== "%:" || // ELSE
367 m
->content
== "%)"))) // END
369 if (nesting
&& m
&& m
->type
== tok_operator
&& m
->content
== "%)") // nested %)
373 throw parse_error ("incomplete conditional - missing '%:' or '%)'", t
);
375 my_enqueued_pp
.push_back (m
);
377 delete m
; // do nothing, just dispose of unkept THEN token
382 if (m
&& m
->type
== tok_operator
&& m
->content
== "%:") // ELSE
390 m
= result
? input
.scan (wildcard
) : scan_pp (wildcard
);
392 catch (const parse_error
& e
)
394 if (!result
) throw e
; // propagate errors if ELSE branch taken
398 if (m
&& m
->type
== tok_operator
&& m
->content
== "%(") // nested %(
400 if (nesting
== 0 && m
&& m
->type
== tok_operator
&& m
->content
== "%)") // END
402 if (nesting
&& m
&& m
->type
== tok_operator
&& m
->content
== "%)") // nested %)
406 throw parse_error ("incomplete conditional - missing %)", t
);
408 my_enqueued_pp
.push_back (m
);
410 delete m
; // do nothing, just dispose of unkept ELSE token
417 clog << "PP eval (" << *t << ") == " << result << " tokens: " << endl;
418 for (unsigned k=0; k<my_enqueued_pp.size(); k++)
419 clog << * my_enqueued_pp[k] << endl;
427 // NB: we transcribe the retained tokens here, and not inside
428 // the THEN/ELSE while loops. If it were done there, each loop
429 // would become infinite (each iteration consuming an ordinary
430 // token the previous one just pushed there). Guess how I
432 enqueued_pp
.insert (enqueued_pp
.end(),
433 my_enqueued_pp
.begin(),
434 my_enqueued_pp
.end());
436 // Go back to outermost while(true) loop. We hope that at least
437 // some THEN or ELSE tokens were enqueued. If not, around we go
444 parser::next (bool wildcard
)
447 next_t
= scan_pp (wildcard
);
449 throw parse_error ("unexpected end-of-file");
452 // advance by zeroing next_t
459 parser::peek (bool wildcard
)
462 next_t
= scan_pp (wildcard
);
464 // don't advance by zeroing next_t
471 tok_is(token
const * t
, token_type tt
, string
const & expected
)
473 return t
&& t
->type
== tt
&& t
->content
== expected
;
478 parser::expect_known (token_type tt
, string
const & expected
)
480 const token
*t
= next();
481 if (! (t
&& t
->type
== tt
&& t
->content
== expected
))
482 throw parse_error ("expected '" + expected
+ "'");
488 parser::expect_unknown (token_type tt
, string
& target
)
490 const token
*t
= next();
491 if (!(t
&& t
->type
== tt
))
492 throw parse_error ("expected " + tt2str(tt
));
499 parser::expect_unknown2 (token_type tt1
, token_type tt2
, string
& target
)
501 const token
*t
= next();
502 if (!(t
&& (t
->type
== tt1
|| t
->type
== tt2
)))
503 throw parse_error ("expected " + tt2str(tt1
) + " or " + tt2str(tt2
));
510 parser::expect_op (std::string
const & expected
)
512 return expect_known (tok_operator
, expected
);
517 parser::expect_kw (std::string
const & expected
)
519 return expect_known (tok_identifier
, expected
);
523 parser::expect_number (int64_t & value
)
526 const token
*t
= next();
527 if (t
->type
== tok_operator
&& t
->content
== "-")
532 if (!(t
&& t
->type
== tok_number
))
533 throw parse_error ("expected number");
535 const char* startp
= t
->content
.c_str ();
536 char* endp
= (char*) startp
;
538 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
539 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
540 // since the lexer only gives us positive digit strings, but we'll
541 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
543 value
= (int64_t) strtoull (startp
, & endp
, 0);
544 if (errno
== ERANGE
|| errno
== EINVAL
|| *endp
!= '\0'
545 || (neg
&& (unsigned long long) value
> 9223372036854775808ULL)
546 || (unsigned long long) value
> 18446744073709551615ULL
547 || value
< -9223372036854775807LL-1)
548 throw parse_error ("number invalid or out of range");
558 parser::expect_ident (std::string
& target
)
560 return expect_unknown (tok_identifier
, target
);
565 parser::expect_ident_or_keyword (std::string
& target
)
567 return expect_unknown2 (tok_identifier
, tok_keyword
, target
);
572 parser::peek_op (std::string
const & op
)
574 return tok_is (peek(), tok_operator
, op
);
579 parser::peek_kw (std::string
const & kw
)
581 return tok_is (peek(), tok_identifier
, kw
);
586 lexer::lexer (istream
& i
, const string
& in
, systemtap_session
& s
):
587 input (i
), input_name (in
), input_contents (""),
588 input_pointer (0), cursor_suspend_count(0),
589 cursor_line (1), cursor_column (1), session(s
),
594 input_contents
.push_back(c
);
598 lexer::get_input_contents ()
600 return input_contents
;
604 lexer::set_current_file (stapfile
* f
)
610 lexer::input_peek (unsigned n
)
612 if (input_contents
.size() > (input_pointer
+ n
))
613 return (int)(unsigned char)input_contents
[input_pointer
+n
];
622 int c
= input_peek (0);
625 if (c
< 0) return c
; // EOF
627 if (cursor_suspend_count
)
628 // Track effect of input_put: preserve previous cursor/line_column
629 // until all of its characters are consumed.
630 cursor_suspend_count
--;
633 // update source cursor
643 // clog << "[" << (char)c << "]";
649 lexer::input_put (const string
& chars
)
651 // clog << "[put:" << chars << " @" << input_pointer << "]";
652 input_contents
.insert (input_contents
.begin() + input_pointer
, chars
.begin(), chars
.end());
653 cursor_suspend_count
+= chars
.size();
658 lexer::scan (bool wildcard
)
660 token
* n
= new token
;
661 n
->location
.file
= input_name
;
663 n
->location
.stap_file
= current_file
;
665 unsigned semiskipped_p
= 0;
668 n
->location
.line
= cursor_line
;
669 n
->location
.column
= cursor_column
;
672 if (semiskipped_p
> 1)
675 throw parse_error ("invalid nested substitution of command line arguments");
679 int c2
= input_peek ();
680 // clog << "{" << (char)c << (char)c2 << "}";
690 // Paste command line arguments as character streams into
691 // the beginning of a token. $1..$999 go through as raw
692 // characters; @1..@999 are quoted/escaped as strings.
693 // $# and @# expand to the number of arguments, similarly
695 if ((c
== '$' || c
== '@') &&
698 input_get(); // swallow '#'
699 stringstream converter
;
700 converter
<< session
.args
.size ();
701 if (c
== '$') input_put (converter
.str());
702 else input_put (lex_cast_qstring (converter
.str()));
706 else if ((c
== '$' || c
== '@') &&
713 idx
= (idx
* 10) + (c2
- '0');
717 idx
<= session
.args
.size()); // prevent overflow
719 idx
-1 >= session
.args
.size())
720 throw parse_error ("command line argument index " + lex_cast
<string
>(idx
)
721 + " out of range [1-" + lex_cast
<string
>(session
.args
.size()) + "]", n
);
723 string arg
= session
.args
[idx
-1];
724 if (c
== '$') input_put (arg
);
725 else input_put (lex_cast_qstring (arg
));
730 else if (isalpha (c
) || c
== '$' || c
== '@' || c
== '_' ||
731 (wildcard
&& c
== '*'))
733 n
->type
= tok_identifier
;
734 n
->content
= (char) c
;
735 while (isalnum (c2
) || c2
== '_' || c2
== '$' ||
736 (wildcard
&& c2
== '*'))
739 n
->content
.push_back (c2
);
743 if (n
->content
== "probe"
744 || n
->content
== "global"
745 || n
->content
== "function"
746 || n
->content
== "if"
747 || n
->content
== "else"
748 || n
->content
== "for"
749 || n
->content
== "foreach"
750 || n
->content
== "in"
751 || n
->content
== "limit"
752 || n
->content
== "return"
753 || n
->content
== "delete"
754 || n
->content
== "while"
755 || n
->content
== "break"
756 || n
->content
== "continue"
757 || n
->content
== "next"
758 || n
->content
== "string"
759 || n
->content
== "long")
760 n
->type
= tok_keyword
;
765 else if (isdigit (c
)) // positive literal
767 n
->type
= tok_number
;
768 n
->content
= (char) c
;
772 int c2
= input_peek ();
776 // NB: isalnum is very permissive. We rely on strtol, called in
777 // parser::parse_literal below, to confirm that the number string
778 // is correctly formatted and in range.
782 n
->content
.push_back (c2
);
793 n
->type
= tok_string
;
798 if (c
< 0 || c
== '\n')
803 if (c
== '\"') // closing double-quotes
805 else if (c
== '\\') // see also input_put
817 case '0' ... '7': // NB: need only match the first digit
819 // Pass these escapes through to the string value
820 // being parsed; it will be emitted into a C literal.
822 n
->content
.push_back('\\');
826 n
->content
.push_back(c
);
831 n
->content
.push_back(c
);
836 else if (ispunct (c
))
838 int c2
= input_peek ();
839 int c3
= input_peek (1);
840 string s1
= string("") + (char) c
;
841 string s2
= (c2
> 0 ? s1
+ (char) c2
: s1
);
842 string s3
= (c3
> 0 ? s2
+ (char) c3
: s2
);
844 // NB: if we were to recognize negative numeric literals here,
845 // we'd introduce another grammar ambiguity:
846 // 1-1 would be parsed as tok_number(1) and tok_number(-1)
847 // instead of tok_number(1) tok_operator('-') tok_number(1)
849 if (s1
== "#") // shell comment
851 unsigned this_line
= cursor_line
;
852 do { c
= input_get (); }
853 while (c
>= 0 && cursor_line
== this_line
);
856 else if (s2
== "//") // C++ comment
858 unsigned this_line
= cursor_line
;
859 do { c
= input_get (); }
860 while (c
>= 0 && cursor_line
== this_line
);
863 else if (c
== '/' && c2
== '*') // C comment
869 chars
++; // track this to prevent "/*/" from being accepted
872 if (chars
> 1 && c
== '*' && c2
== '/')
877 else if (c
== '%' && c2
== '{') // embedded code
879 n
->type
= tok_embedded
;
880 (void) input_get (); // swallow '{' already in c2
894 (void) input_get (); // swallow '}' too
903 // We're committed to recognizing at least the first character
905 n
->type
= tok_operator
;
907 // match all valid operators, in decreasing size order
913 input_get (); input_get (); // swallow other two characters
915 else if (s2
== "==" ||
935 // preprocessor tokens
942 input_get (); // swallow other character
955 n
->content
= (char) c
;
961 // ------------------------------------------------------------------------
966 stapfile
* f
= new stapfile
;
967 input
.set_current_file (f
);
968 f
->file_contents
= input
.get_input_contents ();
969 f
->name
= input_name
;
977 const token
* t
= peek ();
978 if (! t
) // nice clean EOF
982 if (t
->type
== tok_keyword
&& t
->content
== "probe")
985 parse_probe (f
->probes
, f
->aliases
);
987 else if (t
->type
== tok_keyword
&& t
->content
== "global")
989 context
= con_global
;
990 parse_global (f
->globals
, f
->probes
);
992 else if (t
->type
== tok_keyword
&& t
->content
== "function")
994 context
= con_function
;
995 parse_functiondecl (f
->functions
);
997 else if (t
->type
== tok_embedded
)
999 context
= con_embedded
;
1000 f
->embeds
.push_back (parse_embeddedcode ());
1004 context
= con_unknown
;
1005 throw parse_error ("expected 'probe', 'global', 'function', or '%{'");
1008 catch (parse_error
& pe
)
1011 if (pe
.skip_some
) // for recovery
1014 // Quietly swallow all tokens until the next '}'.
1017 const token
* t
= peek ();
1021 if (t
->type
== tok_operator
&& t
->content
== "}")
1025 catch (parse_error
& pe2
)
1027 // parse error during recovery ... ugh
1035 cerr
<< "Input file '" << input_name
<< "' is empty or missing." << endl
;
1037 input
.set_current_file (0);
1040 else if (num_errors
> 0)
1042 cerr
<< num_errors
<< " parse error(s)." << endl
;
1044 input
.set_current_file (0);
1048 input
.set_current_file (0);
1054 parser::parse_probe (std::vector
<probe
*> & probe_ret
,
1055 std::vector
<probe_alias
*> & alias_ret
)
1057 const token
* t0
= next ();
1058 if (! (t0
->type
== tok_keyword
&& t0
->content
== "probe"))
1059 throw parse_error ("expected 'probe'");
1061 vector
<probe_point
*> aliases
;
1062 vector
<probe_point
*> locations
;
1064 bool equals_ok
= true;
1066 int epilogue_alias
= 0;
1070 probe_point
* pp
= parse_probe_point ();
1072 const token
* t
= peek ();
1074 && t
->type
== tok_operator
&& t
->content
== "=")
1076 if (pp
->optional
|| pp
->sufficient
)
1077 throw parse_error ("probe point alias name cannot be optional nor sufficient", pp
->tok
);
1078 aliases
.push_back(pp
);
1082 else if (equals_ok
&& t
1083 && t
->type
== tok_operator
&& t
->content
== "+=")
1085 if (pp
->optional
|| pp
->sufficient
)
1086 throw parse_error ("probe point alias name cannot be optional nor sufficient", pp
->tok
);
1087 aliases
.push_back(pp
);
1092 else if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
1094 locations
.push_back(pp
);
1099 else if (t
&& t
->type
== tok_operator
&& t
->content
== "{")
1101 locations
.push_back(pp
);
1105 throw parse_error ("expected probe point specifier");
1108 if (aliases
.empty())
1110 probe
* p
= new probe
;
1112 p
->locations
= locations
;
1113 p
->body
= parse_stmt_block ();
1114 p
->privileged
= privileged
;
1115 probe_ret
.push_back (p
);
1119 probe_alias
* p
= new probe_alias (aliases
);
1121 p
->epilogue_style
= true;
1123 p
->epilogue_style
= false;
1125 p
->locations
= locations
;
1126 p
->body
= parse_stmt_block ();
1127 p
->privileged
= privileged
;
1128 alias_ret
.push_back (p
);
1134 parser::parse_embeddedcode ()
1136 embeddedcode
* e
= new embeddedcode
;
1137 const token
* t
= next ();
1138 if (t
->type
!= tok_embedded
)
1139 throw parse_error ("expected '%{'");
1142 throw parse_error ("embedded code in unprivileged script",
1143 false /* don't skip tokens for parse resumption */);
1146 e
->code
= t
->content
;
1152 parser::parse_stmt_block ()
1154 block
* pb
= new block
;
1156 const token
* t
= next ();
1157 if (! (t
->type
== tok_operator
&& t
->content
== "{"))
1158 throw parse_error ("expected '{'");
1167 if (t
&& t
->type
== tok_operator
&& t
->content
== "}")
1173 pb
->statements
.push_back (parse_statement ());
1175 catch (parse_error
& pe
)
1179 // Quietly swallow all tokens until the next ';' or '}'.
1182 const token
* t
= peek ();
1185 if (t
->type
== tok_operator
1186 && (t
->content
== "}" || t
->content
== ";"))
1197 parser::parse_statement ()
1199 const token
* t
= peek ();
1200 if (t
&& t
->type
== tok_operator
&& t
->content
== ";")
1202 null_statement
* n
= new null_statement ();
1206 else if (t
&& t
->type
== tok_operator
&& t
->content
== "{")
1207 return parse_stmt_block ();
1208 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "if")
1209 return parse_if_statement ();
1210 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "for")
1211 return parse_for_loop ();
1212 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "foreach")
1213 return parse_foreach_loop ();
1214 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "return")
1215 return parse_return_statement ();
1216 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "delete")
1217 return parse_delete_statement ();
1218 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "while")
1219 return parse_while_loop ();
1220 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "break")
1221 return parse_break_statement ();
1222 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "continue")
1223 return parse_continue_statement ();
1224 else if (t
&& t
->type
== tok_keyword
&& t
->content
== "next")
1225 return parse_next_statement ();
1226 // XXX: "do/while" statement?
1227 else if (t
&& (t
->type
== tok_operator
|| // expressions are flexible
1228 t
->type
== tok_identifier
||
1229 t
->type
== tok_number
||
1230 t
->type
== tok_string
))
1231 return parse_expr_statement ();
1232 // XXX: consider generally accepting tok_embedded here too
1234 throw parse_error ("expected statement");
1239 parser::parse_global (vector
<vardecl
*>& globals
, vector
<probe
*>&)
1241 const token
* t0
= next ();
1242 if (! (t0
->type
== tok_keyword
&& t0
->content
== "global"))
1243 throw parse_error ("expected 'global'");
1247 const token
* t
= next ();
1248 if (! (t
->type
== tok_identifier
))
1249 throw parse_error ("expected identifier");
1251 for (unsigned i
=0; i
<globals
.size(); i
++)
1252 if (globals
[i
]->name
== t
->content
)
1253 throw parse_error ("duplicate global name");
1255 vardecl
* d
= new vardecl
;
1256 d
->name
= t
->content
;
1258 globals
.push_back (d
);
1262 if (t
&& t
->type
== tok_operator
&& t
->content
== "[") // array size
1266 expect_number(size
);
1267 if (size
<= 0 || size
> 1000000) // arbitrary max
1268 throw parse_error("array size out of range");
1269 d
->maxsize
= (int)size
;
1270 expect_known(tok_operator
, "]");
1274 if (t
&& t
->type
== tok_operator
&& t
->content
== "=") // initialization
1276 if (!d
->compatible_arity(0))
1277 throw parse_error("only scalar globals can be initialized");
1280 d
->init
= parse_literal ();
1281 d
->type
= d
->init
->type
;
1285 if (t
&& t
->type
== tok_operator
&& t
->content
== ";") // termination
1288 if (t
&& t
->type
== tok_operator
&& t
->content
== ",") // next global
1300 parser::parse_functiondecl (std::vector
<functiondecl
*>& functions
)
1302 const token
* t
= next ();
1303 if (! (t
->type
== tok_keyword
&& t
->content
== "function"))
1304 throw parse_error ("expected 'function'");
1308 if (! (t
->type
== tok_identifier
)
1309 && ! (t
->type
== tok_keyword
1310 && (t
->content
== "string" || t
->content
== "long")))
1311 throw parse_error ("expected identifier");
1313 for (unsigned i
=0; i
<functions
.size(); i
++)
1314 if (functions
[i
]->name
== t
->content
)
1315 throw parse_error ("duplicate function name");
1317 functiondecl
*fd
= new functiondecl ();
1318 fd
->name
= t
->content
;
1322 if (t
->type
== tok_operator
&& t
->content
== ":")
1325 if (t
->type
== tok_keyword
&& t
->content
== "string")
1326 fd
->type
= pe_string
;
1327 else if (t
->type
== tok_keyword
&& t
->content
== "long")
1329 else throw parse_error ("expected 'string' or 'long'");
1334 if (! (t
->type
== tok_operator
&& t
->content
== "("))
1335 throw parse_error ("expected '('");
1341 // permit zero-argument fuctions
1342 if (t
->type
== tok_operator
&& t
->content
== ")")
1344 else if (! (t
->type
== tok_identifier
))
1345 throw parse_error ("expected identifier");
1346 vardecl
* vd
= new vardecl
;
1347 vd
->name
= t
->content
;
1349 fd
->formal_args
.push_back (vd
);
1352 if (t
->type
== tok_operator
&& t
->content
== ":")
1355 if (t
->type
== tok_keyword
&& t
->content
== "string")
1356 vd
->type
= pe_string
;
1357 else if (t
->type
== tok_keyword
&& t
->content
== "long")
1359 else throw parse_error ("expected 'string' or 'long'");
1363 if (t
->type
== tok_operator
&& t
->content
== ")")
1365 if (t
->type
== tok_operator
&& t
->content
== ",")
1368 throw parse_error ("expected ',' or ')'");
1372 if (t
&& t
->type
== tok_embedded
)
1373 fd
->body
= parse_embeddedcode ();
1375 fd
->body
= parse_stmt_block ();
1377 functions
.push_back (fd
);
1382 parser::parse_probe_point ()
1384 probe_point
* pl
= new probe_point
;
1388 const token
* t
= next (true); // wildcard scanning here
1389 if (! (t
->type
== tok_identifier
1390 // we must allow ".return" and ".function", which are keywords
1391 || t
->type
== tok_keyword
))
1392 throw parse_error ("expected identifier or '*'");
1394 if (pl
->tok
== 0) pl
->tok
= t
;
1396 probe_point::component
* c
= new probe_point::component
;
1397 c
->functor
= t
->content
;
1398 pl
->components
.push_back (c
);
1399 // NB we may add c->arg soon
1403 // consume optional parameter
1404 if (t
&& t
->type
== tok_operator
&& t
->content
== "(")
1406 next (); // consume "("
1407 c
->arg
= parse_literal ();
1410 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
1411 throw parse_error ("expected ')'");
1416 if (t
&& t
->type
== tok_operator
&& t
->content
== ".")
1422 // We only fall through here at the end of a probe point (past
1423 // all the dotted/parametrized components).
1425 if (t
&& t
->type
== tok_operator
&&
1426 (t
->content
== "?" || t
->content
== "!"))
1428 pl
->optional
= true;
1429 if (t
->content
== "!") pl
->sufficient
= true;
1430 // NB: sufficient implies optional
1436 if (t
&& t
->type
== tok_keyword
&& t
->content
== "if")
1440 if (t
&& ! (t
->type
== tok_operator
&& t
->content
== "("))
1441 throw parse_error ("expected '('");
1444 pl
->condition
= parse_expression ();
1447 if (t
&& ! (t
->type
== tok_operator
&& t
->content
== ")"))
1448 throw parse_error ("expected ')'");
1454 if (t
&& t
->type
== tok_operator
1455 && (t
->content
== "{" || t
->content
== "," ||
1456 t
->content
== "=" || t
->content
== "+=" ))
1459 throw parse_error ("expected one of '. , ( ? ! { = +='");
1467 parser::parse_literal ()
1469 const token
* t
= next ();
1471 if (t
->type
== tok_string
)
1472 l
= new literal_string (t
->content
);
1476 if (t
->type
== tok_operator
&& t
->content
== "-")
1482 if (t
->type
== tok_number
)
1484 const char* startp
= t
->content
.c_str ();
1485 char* endp
= (char*) startp
;
1487 // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1488 // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1489 // since the lexer only gives us positive digit strings, but we'll
1490 // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1492 long long value
= (long long) strtoull (startp
, & endp
, 0);
1493 if (errno
== ERANGE
|| errno
== EINVAL
|| *endp
!= '\0'
1494 || (neg
&& (unsigned long long) value
> 9223372036854775808ULL)
1495 || (unsigned long long) value
> 18446744073709551615ULL
1496 || value
< -9223372036854775807LL-1)
1497 throw parse_error ("number invalid or out of range");
1502 l
= new literal_number (value
);
1505 throw parse_error ("expected literal string or number");
1514 parser::parse_if_statement ()
1516 const token
* t
= next ();
1517 if (! (t
->type
== tok_keyword
&& t
->content
== "if"))
1518 throw parse_error ("expected 'if'");
1519 if_statement
* s
= new if_statement
;
1523 if (! (t
->type
== tok_operator
&& t
->content
== "("))
1524 throw parse_error ("expected '('");
1526 s
->condition
= parse_expression ();
1529 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
1530 throw parse_error ("expected ')'");
1532 s
->thenblock
= parse_statement ();
1535 if (t
&& t
->type
== tok_keyword
&& t
->content
== "else")
1538 s
->elseblock
= parse_statement ();
1541 s
->elseblock
= 0; // in case not otherwise initialized
1548 parser::parse_expr_statement ()
1550 expr_statement
*es
= new expr_statement
;
1551 const token
* t
= peek ();
1553 es
->value
= parse_expression ();
1559 parser::parse_return_statement ()
1561 const token
* t
= next ();
1562 if (! (t
->type
== tok_keyword
&& t
->content
== "return"))
1563 throw parse_error ("expected 'return'");
1564 if (context
!= con_function
)
1565 throw parse_error ("found 'return' not in function context");
1566 return_statement
* s
= new return_statement
;
1568 s
->value
= parse_expression ();
1574 parser::parse_delete_statement ()
1576 const token
* t
= next ();
1577 if (! (t
->type
== tok_keyword
&& t
->content
== "delete"))
1578 throw parse_error ("expected 'delete'");
1579 delete_statement
* s
= new delete_statement
;
1581 s
->value
= parse_expression ();
1587 parser::parse_next_statement ()
1589 const token
* t
= next ();
1590 if (! (t
->type
== tok_keyword
&& t
->content
== "next"))
1591 throw parse_error ("expected 'next'");
1592 if (context
!= con_probe
)
1593 throw parse_error ("found 'next' not in probe context");
1594 next_statement
* s
= new next_statement
;
1601 parser::parse_break_statement ()
1603 const token
* t
= next ();
1604 if (! (t
->type
== tok_keyword
&& t
->content
== "break"))
1605 throw parse_error ("expected 'break'");
1606 break_statement
* s
= new break_statement
;
1613 parser::parse_continue_statement ()
1615 const token
* t
= next ();
1616 if (! (t
->type
== tok_keyword
&& t
->content
== "continue"))
1617 throw parse_error ("expected 'continue'");
1618 continue_statement
* s
= new continue_statement
;
1625 parser::parse_for_loop ()
1627 const token
* t
= next ();
1628 if (! (t
->type
== tok_keyword
&& t
->content
== "for"))
1629 throw parse_error ("expected 'for'");
1630 for_loop
* s
= new for_loop
;
1634 if (! (t
->type
== tok_operator
&& t
->content
== "("))
1635 throw parse_error ("expected '('");
1637 // initializer + ";"
1639 if (t
&& t
->type
== tok_operator
&& t
->content
== ";")
1646 s
->init
= parse_expr_statement ();
1648 if (! (t
->type
== tok_operator
&& t
->content
== ";"))
1649 throw parse_error ("expected ';'");
1654 if (t
&& t
->type
== tok_operator
&& t
->content
== ";")
1656 literal_number
* l
= new literal_number(1);
1658 s
->cond
->tok
= next ();
1662 s
->cond
= parse_expression ();
1664 if (! (t
->type
== tok_operator
&& t
->content
== ";"))
1665 throw parse_error ("expected ';'");
1670 if (t
&& t
->type
== tok_operator
&& t
->content
== ")")
1677 s
->incr
= parse_expr_statement ();
1679 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
1680 throw parse_error ("expected ')'");
1684 s
->block
= parse_statement ();
1691 parser::parse_while_loop ()
1693 const token
* t
= next ();
1694 if (! (t
->type
== tok_keyword
&& t
->content
== "while"))
1695 throw parse_error ("expected 'while'");
1696 for_loop
* s
= new for_loop
;
1700 if (! (t
->type
== tok_operator
&& t
->content
== "("))
1701 throw parse_error ("expected '('");
1703 // dummy init and incr fields
1708 s
->cond
= parse_expression ();
1711 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
1712 throw parse_error ("expected ')'");
1715 s
->block
= parse_statement ();
1722 parser::parse_foreach_loop ()
1724 const token
* t
= next ();
1725 if (! (t
->type
== tok_keyword
&& t
->content
== "foreach"))
1726 throw parse_error ("expected 'foreach'");
1727 foreach_loop
* s
= new foreach_loop
;
1729 s
->sort_direction
= 0;
1733 if (! (t
->type
== tok_operator
&& t
->content
== "("))
1734 throw parse_error ("expected '('");
1736 // see also parse_array_in
1738 bool parenthesized
= false;
1740 if (t
&& t
->type
== tok_operator
&& t
->content
== "[")
1743 parenthesized
= true;
1749 if (! (t
->type
== tok_identifier
))
1750 throw parse_error ("expected identifier");
1751 symbol
* sym
= new symbol
;
1753 sym
->name
= t
->content
;
1754 s
->indexes
.push_back (sym
);
1757 if (t
&& t
->type
== tok_operator
&&
1758 (t
->content
== "+" || t
->content
== "-"))
1760 if (s
->sort_direction
)
1761 throw parse_error ("multiple sort directives");
1762 s
->sort_direction
= (t
->content
== "+") ? 1 : -1;
1763 s
->sort_column
= s
->indexes
.size();
1770 if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
1775 else if (t
&& t
->type
== tok_operator
&& t
->content
== "]")
1781 throw parse_error ("expected ',' or ']'");
1784 break; // expecting only one expression
1788 if (! (t
->type
== tok_keyword
&& t
->content
== "in"))
1789 throw parse_error ("expected 'in'");
1791 s
->base
= parse_indexable();
1794 if (t
&& t
->type
== tok_operator
&&
1795 (t
->content
== "+" || t
->content
== "-"))
1797 if (s
->sort_direction
)
1798 throw parse_error ("multiple sort directives");
1799 s
->sort_direction
= (t
->content
== "+") ? 1 : -1;
1805 if (tok_is(t
, tok_keyword
, "limit"))
1807 next (); // get past the "limit"
1808 s
->limit
= parse_expression ();
1812 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
1813 throw parse_error ("expected ')'");
1815 s
->block
= parse_statement ();
1821 parser::parse_expression ()
1823 return parse_assignment ();
1828 parser::parse_assignment ()
1830 expression
* op1
= parse_ternary ();
1832 const token
* t
= peek ();
1833 // right-associative operators
1834 if (t
&& t
->type
== tok_operator
1835 && (t
->content
== "=" ||
1836 t
->content
== "<<<" ||
1837 t
->content
== "+=" ||
1838 t
->content
== "-=" ||
1839 t
->content
== "*=" ||
1840 t
->content
== "/=" ||
1841 t
->content
== "%=" ||
1842 t
->content
== "<<=" ||
1843 t
->content
== ">>=" ||
1844 t
->content
== "&=" ||
1845 t
->content
== "^=" ||
1846 t
->content
== "|=" ||
1847 t
->content
== ".=" ||
1850 // NB: lvalueness is checked during elaboration / translation
1851 assignment
* e
= new assignment
;
1856 e
->right
= parse_expression ();
1865 parser::parse_ternary ()
1867 expression
* op1
= parse_logical_or ();
1869 const token
* t
= peek ();
1870 if (t
&& t
->type
== tok_operator
&& t
->content
== "?")
1872 ternary_expression
* e
= new ternary_expression
;
1876 e
->truevalue
= parse_expression (); // XXX
1879 if (! (t
->type
== tok_operator
&& t
->content
== ":"))
1880 throw parse_error ("expected ':'");
1882 e
->falsevalue
= parse_expression (); // XXX
1891 parser::parse_logical_or ()
1893 expression
* op1
= parse_logical_and ();
1895 const token
* t
= peek ();
1896 while (t
&& t
->type
== tok_operator
&& t
->content
== "||")
1898 logical_or_expr
* e
= new logical_or_expr
;
1903 e
->right
= parse_logical_and ();
1913 parser::parse_logical_and ()
1915 expression
* op1
= parse_boolean_or ();
1917 const token
* t
= peek ();
1918 while (t
&& t
->type
== tok_operator
&& t
->content
== "&&")
1920 logical_and_expr
*e
= new logical_and_expr
;
1925 e
->right
= parse_boolean_or ();
1935 parser::parse_boolean_or ()
1937 expression
* op1
= parse_boolean_xor ();
1939 const token
* t
= peek ();
1940 while (t
&& t
->type
== tok_operator
&& t
->content
== "|")
1942 binary_expression
* e
= new binary_expression
;
1947 e
->right
= parse_boolean_xor ();
1957 parser::parse_boolean_xor ()
1959 expression
* op1
= parse_boolean_and ();
1961 const token
* t
= peek ();
1962 while (t
&& t
->type
== tok_operator
&& t
->content
== "^")
1964 binary_expression
* e
= new binary_expression
;
1969 e
->right
= parse_boolean_and ();
1979 parser::parse_boolean_and ()
1981 expression
* op1
= parse_array_in ();
1983 const token
* t
= peek ();
1984 while (t
&& t
->type
== tok_operator
&& t
->content
== "&")
1986 binary_expression
* e
= new binary_expression
;
1991 e
->right
= parse_array_in ();
2001 parser::parse_array_in ()
2003 // This is a very tricky case. All these are legit expressions:
2004 // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
2005 vector
<expression
*> indexes
;
2006 bool parenthesized
= false;
2008 const token
* t
= peek ();
2009 if (t
&& t
->type
== tok_operator
&& t
->content
== "[")
2012 parenthesized
= true;
2017 expression
* op1
= parse_comparison ();
2018 indexes
.push_back (op1
);
2022 const token
* t
= peek ();
2023 if (t
&& t
->type
== tok_operator
&& t
->content
== ",")
2028 else if (t
&& t
->type
== tok_operator
&& t
->content
== "]")
2034 throw parse_error ("expected ',' or ']'");
2037 break; // expecting only one expression
2041 if (t
&& t
->type
== tok_keyword
&& t
->content
== "in")
2043 array_in
*e
= new array_in
;
2045 next (); // swallow "in"
2047 arrayindex
* a
= new arrayindex
;
2048 a
->indexes
= indexes
;
2049 a
->base
= parse_indexable();
2050 a
->tok
= a
->base
->get_tok();
2054 else if (indexes
.size() == 1) // no "in" - need one expression only
2057 throw parse_error ("unexpected comma-separated expression list");
2062 parser::parse_comparison ()
2064 expression
* op1
= parse_shift ();
2066 const token
* t
= peek ();
2067 while (t
&& t
->type
== tok_operator
2068 && (t
->content
== ">" ||
2069 t
->content
== "<" ||
2070 t
->content
== "==" ||
2071 t
->content
== "!=" ||
2072 t
->content
== "<=" ||
2073 t
->content
== ">="))
2075 comparison
* e
= new comparison
;
2080 e
->right
= parse_shift ();
2090 parser::parse_shift ()
2092 expression
* op1
= parse_concatenation ();
2094 const token
* t
= peek ();
2095 while (t
&& t
->type
== tok_operator
&&
2096 (t
->content
== "<<" || t
->content
== ">>"))
2098 binary_expression
* e
= new binary_expression
;
2103 e
->right
= parse_concatenation ();
2113 parser::parse_concatenation ()
2115 expression
* op1
= parse_additive ();
2117 const token
* t
= peek ();
2118 // XXX: the actual awk string-concatenation operator is *whitespace*.
2119 // I don't know how to easily to model that here.
2120 while (t
&& t
->type
== tok_operator
&& t
->content
== ".")
2122 concatenation
* e
= new concatenation
;
2127 e
->right
= parse_additive ();
2137 parser::parse_additive ()
2139 expression
* op1
= parse_multiplicative ();
2141 const token
* t
= peek ();
2142 while (t
&& t
->type
== tok_operator
2143 && (t
->content
== "+" || t
->content
== "-"))
2145 binary_expression
* e
= new binary_expression
;
2150 e
->right
= parse_multiplicative ();
2160 parser::parse_multiplicative ()
2162 expression
* op1
= parse_unary ();
2164 const token
* t
= peek ();
2165 while (t
&& t
->type
== tok_operator
2166 && (t
->content
== "*" || t
->content
== "/" || t
->content
== "%"))
2168 binary_expression
* e
= new binary_expression
;
2173 e
->right
= parse_unary ();
2183 parser::parse_unary ()
2185 const token
* t
= peek ();
2186 if (t
&& t
->type
== tok_operator
2187 && (t
->content
== "+" ||
2188 t
->content
== "-" ||
2189 t
->content
== "!" ||
2190 t
->content
== "~" ||
2193 unary_expression
* e
= new unary_expression
;
2197 e
->operand
= parse_crement ();
2201 return parse_crement ();
2206 parser::parse_crement () // as in "increment" / "decrement"
2208 // NB: Ideally, we'd parse only a symbol as an operand to the
2209 // *crement operators, instead of a general expression value. We'd
2210 // need more complex lookahead code to tell apart the postfix cases.
2211 // So we just punt, and leave it to pass-3 to signal errors on
2212 // cases like "4++".
2214 const token
* t
= peek ();
2215 if (t
&& t
->type
== tok_operator
2216 && (t
->content
== "++" || t
->content
== "--"))
2218 pre_crement
* e
= new pre_crement
;
2222 e
->operand
= parse_value ();
2226 // post-crement or non-crement
2227 expression
*op1
= parse_value ();
2230 if (t
&& t
->type
== tok_operator
2231 && (t
->content
== "++" || t
->content
== "--"))
2233 post_crement
* e
= new post_crement
;
2246 parser::parse_value ()
2248 const token
* t
= peek ();
2250 throw parse_error ("expected value");
2252 if (t
->type
== tok_operator
&& t
->content
== "(")
2255 expression
* e
= parse_expression ();
2257 if (! (t
->type
== tok_operator
&& t
->content
== ")"))
2258 throw parse_error ("expected ')'");
2261 else if (t
->type
== tok_identifier
)
2262 return parse_symbol ();
2264 return parse_literal ();
2269 parser::parse_hist_op_or_bare_name (hist_op
*&hop
, string
&name
)
2272 const token
* t
= expect_ident (name
);
2273 if (name
== "@hist_linear" || name
== "@hist_log")
2276 if (name
== "@hist_linear")
2277 hop
->htype
= hist_linear
;
2278 else if (name
== "@hist_log")
2279 hop
->htype
= hist_log
;
2282 hop
->stat
= parse_expression ();
2284 if (hop
->htype
== hist_linear
)
2286 for (size_t i
= 0; i
< 3; ++i
)
2289 expect_number (tnum
);
2290 hop
->params
.push_back (tnum
);
2300 parser::parse_indexable ()
2302 hist_op
*hop
= NULL
;
2304 const token
*tok
= parse_hist_op_or_bare_name(hop
, name
);
2309 symbol
* sym
= new symbol
;
2317 // var, indexable[index], func(parms), printf("...", ...), $var, $var->member, @stat_op(stat)
2319 parser::parse_symbol ()
2321 hist_op
*hop
= NULL
;
2324 const token
*t
= parse_hist_op_or_bare_name(hop
, name
);
2328 // If we didn't get a hist_op, then we did get an identifier. We can
2329 // now scrutinize this identifier for the various magic forms of identifier
2330 // (printf, @stat_op, and $var...)
2332 bool pf_stream
, pf_format
, pf_delim
, pf_newline
, pf_char
;
2334 if (name
== "@cast")
2336 // type-punning time
2337 cast_op
*cop
= new cast_op
;
2339 cop
->base_name
= name
;
2341 cop
->operand
= parse_expression ();
2343 expect_unknown(tok_string
, cop
->type
);
2344 // types never start with "struct<space>" or "union<space>",
2346 if (cop
->type
.compare(0, 7, "struct ") == 0)
2347 cop
->type
= cop
->type
.substr(7);
2348 if (cop
->type
.compare(0, 6, "union ") == 0)
2349 cop
->type
= cop
->type
.substr(6);
2353 expect_unknown(tok_string
, cop
->module
);
2362 expect_ident_or_keyword (c
);
2363 cop
->components
.push_back
2364 (make_pair (target_symbol::comp_struct_member
, c
));
2366 else if (peek_op ("["))
2369 expect_unknown (tok_number
, c
);
2371 cop
->components
.push_back
2372 (make_pair (target_symbol::comp_literal_array_index
, c
));
2377 // if there aren't any dereferences, then the cast is pointless
2378 if (cop
->components
.empty())
2380 expression
*op
= cop
->operand
;
2387 else if (name
.size() > 0 && name
[0] == '@')
2389 stat_op
*sop
= new stat_op
;
2391 sop
->ctype
= sc_average
;
2392 else if (name
== "@count")
2393 sop
->ctype
= sc_count
;
2394 else if (name
== "@sum")
2395 sop
->ctype
= sc_sum
;
2396 else if (name
== "@min")
2397 sop
->ctype
= sc_min
;
2398 else if (name
== "@max")
2399 sop
->ctype
= sc_max
;
2401 throw parse_error("unknown statistic operator " + name
);
2404 sop
->stat
= parse_expression ();
2409 else if (print_format::parse_print(name
,
2410 pf_stream
, pf_format
, pf_delim
, pf_newline
, pf_char
))
2412 print_format
*fmt
= new print_format
;
2414 fmt
->print_to_stream
= pf_stream
;
2415 fmt
->print_with_format
= pf_format
;
2416 fmt
->print_with_delim
= pf_delim
;
2417 fmt
->print_with_newline
= pf_newline
;
2418 fmt
->print_char
= pf_char
;
2421 if ((name
== "print" || name
== "println") &&
2422 (peek_kw("@hist_linear") || peek_kw("@hist_log")))
2424 // We have a special case where we recognize
2425 // print(@hist_foo(bar)) as a magic print-the-histogram
2426 // construct. This is sort of gross but it avoids
2427 // promoting histogram references to typeful
2431 t
= parse_hist_op_or_bare_name(hop
, name
);
2434 // It is, sadly, possible that even while parsing a
2435 // hist_op, we *mis-guessed* and the user wishes to
2436 // print(@hist_op(foo)[bucket]), a scalar. In that case
2437 // we must parse the arrayindex and print an expression.
2443 // This is simplified version of the
2444 // multi-array-index parser below, because we can
2445 // only ever have one index on a histogram anyways.
2447 struct arrayindex
* ai
= new arrayindex
;
2450 ai
->indexes
.push_back (parse_expression ());
2452 fmt
->args
.push_back(ai
);
2458 if (fmt
->print_with_format
)
2460 // Consume and convert a format string. Agreement between the
2461 // format string and the arguments is postponed to the
2462 // typechecking phase.
2464 expect_unknown (tok_string
, tmp
);
2465 fmt
->raw_components
= tmp
;
2466 fmt
->components
= print_format::string_to_components (tmp
);
2468 else if (fmt
->print_with_delim
)
2470 // Consume a delimiter to separate arguments.
2471 fmt
->delimiter
.clear();
2472 fmt
->delimiter
.type
= print_format::conv_literal
;
2473 expect_unknown (tok_string
, fmt
->delimiter
.literal_string
);
2478 // If we are not printing with a format string, we must have
2479 // at least one argument (of any type).
2480 expression
*e
= parse_expression ();
2481 fmt
->args
.push_back(e
);
2484 // Consume any subsequent arguments.
2485 while (min_args
|| !peek_op (")"))
2488 expression
*e
= parse_expression ();
2489 fmt
->args
.push_back(e
);
2498 else if (name
.size() > 0 && name
[0] == '$')
2500 // target_symbol time
2501 target_symbol
*tsym
= new target_symbol
;
2503 tsym
->base_name
= name
;
2510 expect_ident_or_keyword (c
);
2511 tsym
->components
.push_back
2512 (make_pair (target_symbol::comp_struct_member
, c
));
2514 else if (peek_op ("["))
2517 expect_unknown (tok_number
, c
);
2519 tsym
->components
.push_back
2520 (make_pair (target_symbol::comp_literal_array_index
, c
));
2528 else if (peek_op ("(")) // function call
2531 struct functioncall
* f
= new functioncall
;
2534 // Allow empty actual parameter list
2542 f
->args
.push_back (parse_expression ());
2548 else if (peek_op (","))
2554 throw parse_error ("expected ',' or ')'");
2567 // By now, either we had a hist_op in the first place, or else
2568 // we had a plain word and it was converted to a symbol.
2570 assert (!hop
!= !sym
); // logical XOR
2572 // All that remains is to check for array indexing
2574 if (peek_op ("[")) // array
2577 struct arrayindex
* ai
= new arrayindex
;
2587 ai
->indexes
.push_back (parse_expression ());
2593 else if (peek_op (","))
2599 throw parse_error ("expected ',' or ']'");
2604 // If we got to here, we *should* have a symbol; if we have
2605 // a hist_op on its own, it doesn't count as an expression,
2606 // so we throw a parse error.
2609 throw parse_error("base histogram operator where expression expected", t
);
2614 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */