1 : // recursive descent parser for systemtap scripts
2 : // Copyright (C) 2005-2007 Red Hat Inc.
3 : // Copyright (C) 2006 Intel Corporation.
4 : // Copyright (C) 2007 Bull S.A.S
5 : //
6 : // This file is part of systemtap, and is free software. You can
7 : // redistribute it and/or modify it under the terms of the GNU General
8 : // Public License (GPL); either version 2, or (at your option) any
9 : // later version.
10 :
11 : #include "config.h"
12 : #include "staptree.h"
13 : #include "parse.h"
14 : #include "session.h"
15 : #include "util.h"
16 :
17 : #include <iostream>
18 : #include <fstream>
19 : #include <cctype>
20 : #include <cstdlib>
21 : #include <cassert>
22 : #include <cerrno>
23 : #include <climits>
24 : #include <sstream>
25 : #include <cstring>
26 : #include <cctype>
27 : extern "C" {
28 : #include <fnmatch.h>
29 : }
30 :
31 : using namespace std;
32 :
33 : // ------------------------------------------------------------------------
34 :
35 :
36 :
37 286 : parser::parser (systemtap_session& s, istream& i, bool p):
38 : session (s),
39 : input_name ("<input>"), free_input (0),
40 : input (i, input_name, s), privileged (p),
41 286 : context(con_unknown), last_t (0), next_t (0), num_errors (0)
42 286 : { }
43 :
44 41918 : parser::parser (systemtap_session& s, const string& fn, bool p):
45 : session (s),
46 : input_name (fn), free_input (new ifstream (input_name.c_str(), ios::in)),
47 : input (* free_input, input_name, s), privileged (p),
48 41918 : context(con_unknown), last_t (0), next_t (0), num_errors (0)
49 41918 : { }
50 :
51 42204 : parser::~parser()
52 : {
53 42204 : if (free_input) delete free_input;
54 42204 : }
55 :
56 :
57 : stapfile*
58 286 : parser::parse (systemtap_session& s, std::istream& i, bool pr)
59 : {
60 286 : parser p (s, i, pr);
61 286 : return p.parse ();
62 : }
63 :
64 :
65 : stapfile*
66 41918 : parser::parse (systemtap_session& s, const std::string& n, bool pr)
67 : {
68 41918 : parser p (s, n, pr);
69 41918 : return p.parse ();
70 : }
71 :
72 : static string
73 528 : tt2str(token_type tt)
74 : {
75 528 : switch (tt)
76 : {
77 1 : case tok_junk: return "junk";
78 353 : case tok_identifier: return "identifier";
79 136 : case tok_operator: return "operator";
80 7 : case tok_string: return "string";
81 17 : case tok_number: return "number";
82 3 : case tok_embedded: return "embedded-code";
83 8 : case tok_keyword: return "keyword";
84 : }
85 3 : return "unknown token";
86 : }
87 :
88 : ostream&
89 1209 : operator << (ostream& o, const source_loc& loc)
90 : {
91 : o << loc.file << ":"
92 : << loc.line << ":"
93 1209 : << loc.column;
94 :
95 1209 : return o;
96 : }
97 :
98 : ostream&
99 525 : operator << (ostream& o, const token& t)
100 : {
101 525 : o << tt2str(t.type);
102 :
103 1047 : if (t.type != tok_embedded && t.type != tok_keyword) // XXX: other types?
104 : {
105 514 : o << " '";
106 2420 : for (unsigned i=0; i<t.content.length(); i++)
107 : {
108 1906 : char c = t.content[i];
109 1906 : o << (isprint (c) ? c : '?');
110 : }
111 514 : o << "'";
112 : }
113 :
114 : o << " at "
115 525 : << t.location;
116 :
117 525 : return o;
118 : }
119 :
120 :
121 : void
122 121 : parser::print_error (const parse_error &pe)
123 : {
124 121 : cerr << "parse error: " << pe.what () << endl;
125 :
126 121 : if (pe.tok)
127 : {
128 14 : cerr << "\tat: " << *pe.tok << endl;
129 : }
130 : else
131 : {
132 107 : const token* t = last_t;
133 107 : if (t)
134 80 : cerr << "\tsaw: " << *t << endl;
135 : else
136 27 : cerr << "\tsaw: " << input_name << " EOF" << endl;
137 : }
138 :
139 : // XXX: make it possible to print the last input line,
140 : // so as to line up an arrow with the specific error column
141 :
142 121 : num_errors ++;
143 121 : }
144 :
145 :
146 : const token*
147 0 : parser::last ()
148 : {
149 0 : return last_t;
150 : }
151 :
152 :
153 : // Here, we perform on-the-fly preprocessing.
154 : // The basic form is %( CONDITION %? THEN-TOKENS %: ELSE-TOKENS %)
155 : // where CONDITION is: kernel_v[r] COMPARISON-OP "version-string"
156 : // or: arch COMPARISON-OP "arch-string"
157 : // or: "string1" COMPARISON-OP "string2"
158 : // or: number1 COMPARISON-OP number2
159 : // The %: ELSE-TOKENS part is optional.
160 : //
161 : // e.g. %( kernel_v > "2.5" %? "foo" %: "baz" %)
162 : // e.g. %( arch != "i686" %? "foo" %: "baz" %)
163 : //
164 : // Up to an entire %( ... %) expression is processed by a single call
165 : // to this function. Tokens included by any nested conditions are
166 : // enqueued in a private vector.
167 :
168 : bool eval_pp_conditional (systemtap_session& s,
169 72568 : const token* l, const token* op, const token* r)
170 : {
171 72568 : if (l->type == tok_identifier && (l->content == "kernel_v" ||
172 : l->content == "kernel_vr"))
173 : {
174 62798 : string target_kernel_vr = s.kernel_release;
175 62798 : string target_kernel_v = s.kernel_base_release;
176 :
177 62798 : if (! (r->type == tok_string))
178 1 : throw parse_error ("expected string literal", r);
179 :
180 : string target = (l->content == "kernel_vr" ?
181 : target_kernel_vr.c_str() :
182 62797 : target_kernel_v.c_str());
183 62797 : string query = r->content;
184 62797 : bool rhs_wildcard = (strpbrk (query.c_str(), "*?[") != 0);
185 :
186 : // collect acceptable strverscmp results.
187 : int rvc_ok1, rvc_ok2;
188 62797 : bool wc_ok = false;
189 62797 : if (op->type == tok_operator && op->content == "<=")
190 1086 : { rvc_ok1 = -1; rvc_ok2 = 0; }
191 61711 : else if (op->type == tok_operator && op->content == ">=")
192 51943 : { rvc_ok1 = 1; rvc_ok2 = 0; }
193 9768 : else if (op->type == tok_operator && op->content == "<")
194 7581 : { rvc_ok1 = -1; rvc_ok2 = -1; }
195 2187 : else if (op->type == tok_operator && op->content == ">")
196 1086 : { rvc_ok1 = 1; rvc_ok2 = 1; }
197 1101 : else if (op->type == tok_operator && op->content == "==")
198 1092 : { rvc_ok1 = 0; rvc_ok2 = 0; wc_ok = true; }
199 9 : else if (op->type == tok_operator && op->content == "!=")
200 8 : { rvc_ok1 = -1; rvc_ok2 = 1; wc_ok = true; }
201 : else
202 1 : throw parse_error ("expected comparison operator", op);
203 :
204 62796 : if ((!wc_ok) && rhs_wildcard)
205 0 : throw parse_error ("wildcard not allowed with order comparison operators", op);
206 :
207 62796 : if (rhs_wildcard)
208 : {
209 : int rvc_result = fnmatch (query.c_str(), target.c_str(),
210 1090 : FNM_NOESCAPE); // spooky
211 1090 : bool badness = (rvc_result == 0) ^ (op->content == "==");
212 63886 : return !badness;
213 : }
214 : else
215 : {
216 61706 : int rvc_result = strverscmp (target.c_str(), query.c_str());
217 : // normalize rvc_result
218 61706 : if (rvc_result < 0) rvc_result = -1;
219 61706 : if (rvc_result > 0) rvc_result = 1;
220 61706 : return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
221 1 : }
222 : }
223 9770 : else if (l->type == tok_identifier && l->content == "arch")
224 : {
225 9758 : string target_architecture = s.architecture;
226 9758 : if (! (r->type == tok_string))
227 1 : throw parse_error ("expected string literal", r);
228 9757 : string query_architecture = r->content;
229 :
230 : int nomatch = fnmatch (query_architecture.c_str(),
231 : target_architecture.c_str(),
232 9757 : FNM_NOESCAPE); // still spooky
233 :
234 : bool result;
235 9757 : if (op->type == tok_operator && op->content == "==")
236 4339 : result = !nomatch;
237 5418 : else if (op->type == tok_operator && op->content == "!=")
238 5417 : result = nomatch;
239 : else
240 1 : throw parse_error ("expected '==' or '!='", op);
241 :
242 9756 : return result;
243 : }
244 12 : else if ((l->type == tok_string && r->type == tok_string)
245 : || (l->type == tok_number && r->type == tok_number))
246 : {
247 : // collect acceptable strverscmp results.
248 : int rvc_ok1, rvc_ok2;
249 9 : if (op->type == tok_operator && op->content == "<=")
250 0 : { rvc_ok1 = -1; rvc_ok2 = 0; }
251 9 : else if (op->type == tok_operator && op->content == ">=")
252 2 : { rvc_ok1 = 1; rvc_ok2 = 0; }
253 7 : else if (op->type == tok_operator && op->content == "<")
254 3 : { rvc_ok1 = -1; rvc_ok2 = -1; }
255 4 : else if (op->type == tok_operator && op->content == ">")
256 0 : { rvc_ok1 = 1; rvc_ok2 = 1; }
257 4 : else if (op->type == tok_operator && op->content == "==")
258 2 : { rvc_ok1 = 0; rvc_ok2 = 0; }
259 2 : else if (op->type == tok_operator && op->content == "!=")
260 2 : { rvc_ok1 = -1; rvc_ok2 = 1; }
261 : else
262 0 : throw parse_error ("expected comparison operator", op);
263 :
264 9 : int rvc_result = l->content.compare(r->content);
265 :
266 : // normalize rvc_result
267 9 : if (rvc_result < 0) rvc_result = -1;
268 9 : if (rvc_result > 0) rvc_result = 1;
269 :
270 : // NB: no wildcarding option here
271 :
272 9 : return (rvc_result == rvc_ok1 || rvc_result == rvc_ok2);
273 : }
274 3 : else if (l->type == tok_string && r->type == tok_number
275 : && op->type == tok_operator)
276 1 : throw parse_error ("expected string literal as right value", r);
277 2 : else if (l->type == tok_number && r->type == tok_string
278 : && op->type == tok_operator)
279 1 : throw parse_error ("expected number literal as right value", r);
280 : // XXX: support other forms? "CONFIG_SMP" ?
281 : else
282 : throw parse_error ("expected 'arch' or 'kernel_v' or 'kernel_vr'\n"
283 1 : " or comparison between strings or integers", l);
284 : }
285 :
286 :
287 : // expand_args is used to know if we must expand $x and @x identifiers.
288 : // Only tokens corresponding to the TRUE statement must be expanded
289 : const token*
290 60846314 : parser::scan_pp (bool wildcard, bool expand_args)
291 : {
292 75806 : while (true)
293 : {
294 60846314 : if (enqueued_pp.size() > 0)
295 : {
296 1389485 : const token* t = enqueued_pp[0];
297 1389485 : enqueued_pp.erase (enqueued_pp.begin());
298 1389485 : return t;
299 : }
300 :
301 59456829 : const token* t = input.scan (wildcard, expand_args); // NB: not recursive!
302 59456825 : if (t == 0) // EOF
303 43403 : return t;
304 :
305 59413422 : if (! (t->type == tok_operator && t->content == "%(")) // ordinary token
306 59337604 : return t;
307 :
308 : // We have a %( - it's time to throw a preprocessing party!
309 :
310 : const token *l, *op, *r;
311 75818 : l = input.scan (false, expand_args); // NB: not recursive, though perhaps could be
312 75818 : op = input.scan (false, expand_args);
313 75818 : r = input.scan (false, expand_args);
314 75818 : if (l == 0 || op == 0 || r == 0)
315 1 : throw parse_error ("incomplete condition after '%('", t);
316 : // NB: consider generalizing to consume all tokens until %?, and
317 : // passing that as a vector to an evaluator.
318 :
319 : // Do not evaluate the condition if we haven't expanded everything.
320 : // This may occured when having several recursive conditionals.
321 75817 : bool result = expand_args && eval_pp_conditional (session, l, op, r);
322 75810 : delete l;
323 75810 : delete op;
324 75810 : delete r;
325 :
326 75810 : const token *m = input.scan (); // NB: not recursive
327 75810 : if (! (m && m->type == tok_operator && m->content == "%?"))
328 1 : throw parse_error ("expected '%?' marker for conditional", t);
329 75809 : delete m; // "%?"
330 :
331 75809 : vector<const token*> my_enqueued_pp;
332 75809 : bool have_token = false;
333 :
334 1480460 : while (true) // consume THEN tokens
335 : {
336 1556269 : m = scan_pp (wildcard, result); // NB: recursive
337 1556268 : if (m == 0)
338 : throw parse_error (have_token ?
339 : "incomplete conditional - missing %: or %)" :
340 : "missing THEN tokens for conditional",
341 1 : t);
342 :
343 1556267 : have_token = true;
344 1556267 : if (m->type == tok_operator && (m->content == "%:" || // ELSE
345 : m->content == "%)")) // END
346 75807 : break;
347 : // enqueue token
348 1480460 : if (result)
349 992353 : my_enqueued_pp.push_back (m);
350 : else
351 488107 : delete m; // unused token
352 : // continue
353 : }
354 :
355 75807 : have_token = false;
356 75807 : if (m && m->type == tok_operator && m->content == "%:") // ELSE
357 : {
358 56323 : delete m; // "%:"
359 909070 : while (true)
360 : {
361 965393 : m = scan_pp (wildcard, expand_args && !result); // NB: recursive
362 965393 : if (m == 0)
363 : throw parse_error (have_token ?
364 : "incomplete conditional - missing %)" :
365 : "missing ELSE tokens for conditional",
366 1 : t);
367 :
368 965392 : have_token = true;
369 965392 : if (m->type == tok_operator && m->content == "%)") // END
370 56322 : break;
371 : // enqueue token
372 909070 : if (! result)
373 397136 : my_enqueued_pp.push_back (m);
374 : else
375 511934 : delete m; // unused token
376 : // continue
377 : }
378 : }
379 75806 : delete t; // "%("
380 75806 : delete m; // "%)"
381 :
382 : // NB: we transcribe the retained tokens here, and not inside
383 : // the THEN/ELSE while loops. If it were done there, each loop
384 : // would become infinite (each iteration consuming an ordinary
385 : // token the previous one just pushed there). Guess how I
386 : // figured that out.
387 : enqueued_pp.insert (enqueued_pp.end(),
388 : my_enqueued_pp.begin(),
389 75806 : my_enqueued_pp.end());
390 :
391 : // Go back to outermost while(true) loop. We hope that at least
392 : // some THEN or ELSE tokens were enqueued. If not, around we go
393 : // again, until EOF.
394 : }
395 : }
396 :
397 :
398 : const token*
399 58205431 : parser::next (bool wildcard)
400 : {
401 58205431 : if (! next_t)
402 16070841 : next_t = scan_pp (wildcard);
403 58205431 : if (! next_t)
404 1 : throw parse_error ("unexpected end-of-file");
405 :
406 58205430 : last_t = next_t;
407 : // advance by zeroing next_t
408 58205430 : next_t = 0;
409 58205430 : return last_t;
410 : }
411 :
412 :
413 : const token*
414 314935515 : parser::peek (bool wildcard)
415 : {
416 314935515 : if (! next_t)
417 42178005 : next_t = scan_pp (wildcard);
418 :
419 : // don't advance by zeroing next_t
420 314935500 : last_t = next_t;
421 314935500 : return next_t;
422 : }
423 :
424 :
425 : static inline bool
426 25666652 : tok_is(token const * t, token_type tt, string const & expected)
427 : {
428 25666652 : return t && t->type == tt && t->content == expected;
429 : }
430 :
431 :
432 : const token*
433 2548468 : parser::expect_known (token_type tt, string const & expected)
434 : {
435 2548468 : const token *t = next();
436 2548468 : if (! (t && t->type == tt && t->content == expected))
437 2 : throw parse_error ("expected '" + expected + "'");
438 2548466 : return t;
439 : }
440 :
441 :
442 : const token*
443 11586121 : parser::expect_unknown (token_type tt, string & target)
444 : {
445 11586121 : const token *t = next();
446 11586121 : if (!(t && t->type == tt))
447 3 : throw parse_error ("expected " + tt2str(tt));
448 11586118 : target = t->content;
449 11586118 : return t;
450 : }
451 :
452 :
453 : const token*
454 618960 : parser::expect_unknown2 (token_type tt1, token_type tt2, string & target)
455 : {
456 618960 : const token *t = next();
457 618960 : if (!(t && (t->type == tt1 || t->type == tt2)))
458 0 : throw parse_error ("expected " + tt2str(tt1) + " or " + tt2str(tt2));
459 618960 : target = t->content;
460 618960 : return t;
461 : }
462 :
463 :
464 : const token*
465 2536553 : parser::expect_op (std::string const & expected)
466 : {
467 2536553 : return expect_known (tok_operator, expected);
468 : }
469 :
470 :
471 : const token*
472 0 : parser::expect_kw (std::string const & expected)
473 : {
474 0 : return expect_known (tok_identifier, expected);
475 : }
476 :
477 : const token*
478 12088 : parser::expect_number (int64_t & value)
479 : {
480 12088 : bool neg = false;
481 12088 : const token *t = next();
482 12088 : if (t->type == tok_operator && t->content == "-")
483 : {
484 12 : neg = true;
485 12 : t = next ();
486 : }
487 12088 : if (!(t && t->type == tok_number))
488 0 : throw parse_error ("expected number");
489 :
490 12088 : const char* startp = t->content.c_str ();
491 12088 : char* endp = (char*) startp;
492 :
493 : // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
494 : // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
495 : // since the lexer only gives us positive digit strings, but we'll
496 : // limit it to LLONG_MIN when a '-' operator is fed into the literal.
497 12088 : errno = 0;
498 12088 : value = (int64_t) strtoull (startp, & endp, 0);
499 12088 : if (errno == ERANGE || errno == EINVAL || *endp != '\0'
500 : || (neg && (unsigned long long) value > 9223372036854775808ULL)
501 : || (unsigned long long) value > 18446744073709551615ULL
502 : || value < -9223372036854775807LL-1)
503 0 : throw parse_error ("number invalid or out of range");
504 :
505 12088 : if (neg)
506 12 : value = -value;
507 :
508 12088 : return t;
509 : }
510 :
511 :
512 : const token*
513 11016668 : parser::expect_ident (std::string & target)
514 : {
515 11016668 : return expect_unknown (tok_identifier, target);
516 : }
517 :
518 :
519 : const token*
520 618960 : parser::expect_ident_or_keyword (std::string & target)
521 : {
522 618960 : return expect_unknown2 (tok_identifier, tok_keyword, target);
523 : }
524 :
525 :
526 : bool
527 25659624 : parser::peek_op (std::string const & op)
528 : {
529 25659624 : return tok_is (peek(), tok_operator, op);
530 : }
531 :
532 :
533 : bool
534 1425 : parser::peek_kw (std::string const & kw)
535 : {
536 1425 : return tok_is (peek(), tok_identifier, kw);
537 : }
538 :
539 :
540 :
541 42204 : lexer::lexer (istream& i, const string& in, systemtap_session& s):
542 : input (i), input_name (in), cursor_suspend_count(0),
543 42204 : cursor_line (1), cursor_column (1), session(s)
544 42204 : { }
545 :
546 :
547 : int
548 850291701 : lexer::input_peek (unsigned n)
549 : {
550 2238581382 : while (lookahead.size() <= n)
551 : {
552 537997980 : int c = input.get ();
553 537997980 : lookahead.push_back (input ? c : -1);
554 : }
555 850291701 : return lookahead[n];
556 : }
557 :
558 :
559 : int
560 537956299 : lexer::input_get ()
561 : {
562 537956299 : int c = input_peek (0);
563 537956299 : lookahead.erase (lookahead.begin ());
564 :
565 537956299 : if (c < 0) return c; // EOF
566 :
567 537912893 : if (cursor_suspend_count)
568 : // Track effect of input_put: preserve previous cursor/line_column
569 : // until all of its characters are consumed.
570 523 : cursor_suspend_count --;
571 : else
572 : {
573 : // update source cursor
574 537912370 : if (c == '\n')
575 : {
576 19475338 : cursor_line ++;
577 19475338 : cursor_column = 1;
578 : }
579 : else
580 518437032 : cursor_column ++;
581 : }
582 :
583 537912893 : return c;
584 : }
585 :
586 :
587 : void
588 62 : lexer::input_put (const string& chars)
589 : {
590 : // clog << "[put:" << chars << "]";
591 585 : for (int i=chars.size()-1; i>=0; i--)
592 : {
593 523 : int c = chars[i];
594 523 : lookahead.insert (lookahead.begin(), c);
595 523 : cursor_suspend_count ++;
596 : }
597 62 : }
598 :
599 :
600 : token*
601 59760093 : lexer::scan (bool wildcard, bool expand_args)
602 : {
603 59760093 : token* n = new token;
604 59760093 : n->location.file = input_name;
605 :
606 59760093 : unsigned semiskipped_p = 0;
607 :
608 114648673 : skip:
609 114648673 : n->location.line = cursor_line;
610 114648673 : n->location.column = cursor_column;
611 :
612 114648735 : semiskip:
613 114648735 : if (semiskipped_p > 1)
614 : {
615 1 : input_get ();
616 1 : throw parse_error ("invalid nested substitution of command line arguments");
617 : }
618 :
619 114648734 : int c = input_get();
620 114648734 : int c2 = input_peek ();
621 : // clog << "{" << (char)c << (char)c2 << "}";
622 114648734 : if (c < 0)
623 : {
624 43404 : delete n;
625 43404 : return 0;
626 : }
627 :
628 114605330 : if (isspace (c))
629 52365389 : goto skip;
630 :
631 : // Paste command line arguments as character streams into
632 : // the beginning of a token. $1..$999 go through as raw
633 : // characters; @1..@999 are quoted/escaped as strings.
634 : // $# and @# expand to the number of arguments, similarly
635 : // raw or quoted.
636 62239941 : if (expand_args &&
637 : (c == '$' || c == '@') &&
638 : (c2 == '#'))
639 : {
640 11 : input_get(); // swallow '#'
641 11 : stringstream converter;
642 11 : converter << session.args.size ();
643 11 : if (c == '$') input_put (converter.str());
644 5 : else input_put (lex_cast_qstring (converter.str()));
645 11 : semiskipped_p ++;
646 11 : goto semiskip;
647 : }
648 62239930 : else if (expand_args &&
649 : (c == '$' || c == '@') &&
650 : (isdigit (c2)))
651 : {
652 54 : unsigned idx = 0;
653 54 : do
654 : {
655 54 : input_get ();
656 54 : idx = (idx * 10) + (c2 - '0');
657 54 : c2 = input_peek ();
658 : } while (c2 > 0 &&
659 : isdigit (c2) &&
660 : idx <= session.args.size()); // prevent overflow
661 54 : if (idx == 0 ||
662 : idx-1 >= session.args.size())
663 3 : throw parse_error ("command line argument index invalid or out of range", n);
664 :
665 51 : string arg = session.args[idx-1];
666 51 : if (c == '$') input_put (arg);
667 8 : else input_put (lex_cast_qstring (arg));
668 51 : semiskipped_p ++;
669 51 : goto semiskip;
670 : }
671 :
672 62239876 : else if (isalpha (c) || c == '$' || c == '@' || c == '_' ||
673 : (wildcard && c == '*'))
674 : {
675 23787682 : n->type = tok_identifier;
676 23787682 : n->content = (char) c;
677 176926418 : while (isalnum (c2) || c2 == '_' || c2 == '$' ||
678 : (wildcard && c2 == '*'))
679 : {
680 129351054 : input_get ();
681 129351054 : n->content.push_back (c2);
682 129351054 : c2 = input_peek ();
683 : }
684 :
685 23787682 : if (n->content == "probe"
686 : || n->content == "global"
687 : || n->content == "function"
688 : || n->content == "if"
689 : || n->content == "else"
690 : || n->content == "for"
691 : || n->content == "foreach"
692 : || n->content == "in"
693 : || n->content == "limit"
694 : || n->content == "return"
695 : || n->content == "delete"
696 : || n->content == "while"
697 : || n->content == "break"
698 : || n->content == "continue"
699 : || n->content == "next"
700 : || n->content == "string"
701 : || n->content == "long")
702 6037750 : n->type = tok_keyword;
703 :
704 23787682 : return n;
705 : }
706 :
707 38452194 : else if (isdigit (c)) // positive literal
708 : {
709 1675953 : n->type = tok_number;
710 1675953 : n->content = (char) c;
711 :
712 1404370 : while (1)
713 : {
714 3080323 : int c2 = input_peek ();
715 3080323 : if (c2 < 0)
716 1 : break;
717 :
718 : // NB: isalnum is very permissive. We rely on strtol, called in
719 : // parser::parse_literal below, to confirm that the number string
720 : // is correctly formatted and in range.
721 :
722 3080322 : if (isalnum (c2))
723 : {
724 1404370 : n->content.push_back (c2);
725 1404370 : input_get ();
726 : }
727 : else
728 1675952 : break;
729 : }
730 1675953 : return n;
731 : }
732 :
733 36776241 : else if (c == '\"')
734 : {
735 4346647 : n->type = tok_string;
736 47633707 : while (1)
737 : {
738 51980354 : c = input_get ();
739 :
740 51980354 : if (c < 0 || c == '\n')
741 : {
742 0 : n->type = tok_junk;
743 0 : break;
744 : }
745 51980354 : if (c == '\"') // closing double-quotes
746 4346647 : break;
747 47633707 : else if (c == '\\') // see also input_put
748 : {
749 4814 : c = input_get ();
750 4814 : switch (c)
751 : {
752 : case 'a':
753 : case 'b':
754 : case 't':
755 : case 'n':
756 : case 'v':
757 : case 'f':
758 : case 'r':
759 : case '0' ... '7': // NB: need only match the first digit
760 : case '\\':
761 : // Pass these escapes through to the string value
762 : // being parsed; it will be emitted into a C literal.
763 :
764 4769 : n->content.push_back('\\');
765 :
766 : // fall through
767 : default:
768 4814 : n->content.push_back(c);
769 : break;
770 : }
771 : }
772 : else
773 47628893 : n->content.push_back(c);
774 : }
775 4346647 : return n;
776 : }
777 :
778 32429594 : else if (ispunct (c))
779 : {
780 32429594 : int c2 = input_peek ();
781 32429594 : int c3 = input_peek (1);
782 32429594 : string s1 = string("") + (char) c;
783 64859137 : string s2 = (c2 > 0 ? s1 + (char) c2 : s1);
784 32429594 : string s3 = (c3 > 0 ? s2 + (char) c3 : s2);
785 :
786 : // NB: if we were to recognize negative numeric literals here,
787 : // we'd introduce another grammar ambiguity:
788 : // 1-1 would be parsed as tok_number(1) and tok_number(-1)
789 : // instead of tok_number(1) tok_operator('-') tok_number(1)
790 :
791 32429594 : if (s1 == "#") // shell comment
792 : {
793 1754798 : unsigned this_line = cursor_line;
794 62550000 : do { c = input_get (); }
795 : while (c >= 0 && cursor_line == this_line);
796 2523191 : goto skip;
797 : }
798 30674796 : else if (s2 == "//") // C++ comment
799 : {
800 476156 : unsigned this_line = cursor_line;
801 16254805 : do { c = input_get (); }
802 : while (c >= 0 && cursor_line == this_line);
803 : goto skip;
804 : }
805 30198640 : else if (c == '/' && c2 == '*') // C comment
806 : {
807 292237 : c2 = input_get ();
808 292237 : unsigned chars = 0;
809 74862165 : while (c2 >= 0)
810 : {
811 74569927 : chars ++; // track this to prevent "/*/" from being accepted
812 74569927 : c = c2;
813 74569927 : c2 = input_get ();
814 74569927 : if (chars > 1 && c == '*' && c2 == '/')
815 292236 : break;
816 : }
817 : goto skip;
818 : }
819 29906403 : else if (c == '%' && c2 == '{') // embedded code
820 : {
821 255390 : n->type = tok_embedded;
822 255390 : (void) input_get (); // swallow '{' already in c2
823 84593849 : while (true)
824 : {
825 84849239 : c = input_get ();
826 84849239 : if (c < 0) // EOF
827 : {
828 1 : n->type = tok_junk;
829 1 : break;
830 : }
831 84849238 : if (c == '%')
832 : {
833 396049 : c2 = input_peek ();
834 396049 : if (c2 == '}')
835 : {
836 255389 : (void) input_get (); // swallow '}' too
837 255389 : break;
838 : }
839 : }
840 84593849 : n->content += c;
841 : }
842 30161793 : return n;
843 : }
844 :
845 : // We're committed to recognizing at least the first character
846 : // as an operator.
847 29651013 : n->type = tok_operator;
848 :
849 : // match all valid operators, in decreasing size order
850 29651013 : if (s3 == "<<<" ||
851 : s3 == "<<=" ||
852 : s3 == ">>=")
853 : {
854 390 : n->content = s3;
855 390 : input_get (); input_get (); // swallow other two characters
856 : }
857 29650623 : else if (s2 == "==" ||
858 : s2 == "!=" ||
859 : s2 == "<=" ||
860 : s2 == ">=" ||
861 : s2 == "+=" ||
862 : s2 == "-=" ||
863 : s2 == "*=" ||
864 : s2 == "/=" ||
865 : s2 == "%=" ||
866 : s2 == "&=" ||
867 : s2 == "^=" ||
868 : s2 == "|=" ||
869 : s2 == ".=" ||
870 : s2 == "&&" ||
871 : s2 == "||" ||
872 : s2 == "++" ||
873 : s2 == "--" ||
874 : s2 == "->" ||
875 : s2 == "<<" ||
876 : s2 == ">>" ||
877 : // preprocessor tokens
878 : s2 == "%(" ||
879 : s2 == "%?" ||
880 : s2 == "%:" ||
881 : s2 == "%)")
882 : {
883 1539140 : n->content = s2;
884 1539140 : input_get (); // swallow other character
885 : }
886 : else
887 : {
888 28111483 : n->content = s1;
889 : }
890 :
891 29651013 : return n;
892 : }
893 :
894 : else
895 : {
896 0 : n->type = tok_junk;
897 0 : n->content = (char) c;
898 0 : return n;
899 : }
900 : }
901 :
902 :
903 : // ------------------------------------------------------------------------
904 :
905 : stapfile*
906 42204 : parser::parse ()
907 : {
908 42204 : stapfile* f = new stapfile;
909 42204 : f->name = input_name;
910 :
911 42204 : bool empty = true;
912 :
913 1504777 : while (1)
914 : {
915 : try
916 : {
917 1546981 : const token* t = peek ();
918 1546968 : if (! t) // nice clean EOF
919 42204 : break;
920 :
921 1504764 : empty = false;
922 1504764 : if (t->type == tok_keyword && t->content == "probe")
923 : {
924 1157728 : context = con_probe;
925 1157728 : parse_probe (f->probes, f->aliases);
926 : }
927 347036 : else if (t->type == tok_keyword && t->content == "global")
928 : {
929 15684 : context = con_global;
930 15684 : parse_global (f->globals, f->probes);
931 : }
932 331352 : else if (t->type == tok_keyword && t->content == "function")
933 : {
934 298877 : context = con_function;
935 298877 : parse_functiondecl (f->functions);
936 : }
937 32475 : else if (t->type == tok_embedded)
938 : {
939 32465 : context = con_embedded;
940 32465 : f->embeds.push_back (parse_embeddedcode ());
941 : }
942 : else
943 : {
944 10 : context = con_unknown;
945 10 : throw parse_error ("expected 'probe', 'global', 'function', or '%{'");
946 : }
947 : }
948 114 : catch (parse_error& pe)
949 : {
950 57 : print_error (pe);
951 57 : if (pe.skip_some) // for recovery
952 : try
953 : {
954 : // Quietly swallow all tokens until the next '}'.
955 127 : while (1)
956 : {
957 182 : const token* t = peek ();
958 182 : if (! t)
959 30 : break;
960 152 : next ();
961 152 : if (t->type == tok_operator && t->content == "}")
962 25 : break;
963 : }
964 : }
965 0 : catch (parse_error& pe2)
966 : {
967 : // parse error during recovery ... ugh
968 0 : print_error (pe2);
969 : }
970 : }
971 : }
972 :
973 42204 : if (empty)
974 : {
975 6 : cerr << "Input file '" << input_name << "' is empty or missing." << endl;
976 6 : delete f;
977 6 : return 0;
978 : }
979 42198 : else if (num_errors > 0)
980 : {
981 74 : cerr << num_errors << " parse error(s)." << endl;
982 74 : delete f;
983 74 : return 0;
984 : }
985 :
986 42124 : return f;
987 : }
988 :
989 :
990 : void
991 : parser::parse_probe (std::vector<probe *> & probe_ret,
992 1157728 : std::vector<probe_alias *> & alias_ret)
993 : {
994 1157728 : const token* t0 = next ();
995 1157728 : if (! (t0->type == tok_keyword && t0->content == "probe"))
996 0 : throw parse_error ("expected 'probe'");
997 :
998 1157728 : vector<probe_point *> aliases;
999 2315456 : vector<probe_point *> locations;
1000 :
1001 1157728 : bool equals_ok = true;
1002 :
1003 1157728 : int epilogue_alias = 0;
1004 :
1005 1825707 : while (1)
1006 : {
1007 2983435 : probe_point * pp = parse_probe_point ();
1008 :
1009 2983417 : const token* t = peek ();
1010 2983417 : if (equals_ok && t
1011 : && t->type == tok_operator && t->content == "=")
1012 : {
1013 1150293 : aliases.push_back(pp);
1014 1150293 : next ();
1015 1150293 : continue;
1016 : }
1017 1833124 : else if (equals_ok && t
1018 : && t->type == tok_operator && t->content == "+=")
1019 : {
1020 2 : aliases.push_back(pp);
1021 2 : epilogue_alias = 1;
1022 2 : next ();
1023 2 : continue;
1024 : }
1025 1833122 : else if (t && t->type == tok_operator && t->content == ",")
1026 : {
1027 675412 : locations.push_back(pp);
1028 675412 : equals_ok = false;
1029 675412 : next ();
1030 675412 : continue;
1031 : }
1032 1157710 : else if (t && t->type == tok_operator && t->content == "{")
1033 : {
1034 1157710 : locations.push_back(pp);
1035 : break;
1036 : }
1037 : else
1038 0 : throw parse_error ("expected probe point specifier");
1039 : }
1040 :
1041 2315420 : if (aliases.empty())
1042 : {
1043 7415 : probe* p = new probe;
1044 7415 : p->tok = t0;
1045 7415 : p->locations = locations;
1046 7415 : p->body = parse_stmt_block ();
1047 7415 : p->privileged = privileged;
1048 7415 : probe_ret.push_back (p);
1049 : }
1050 : else
1051 : {
1052 1150295 : probe_alias* p = new probe_alias (aliases);
1053 1150295 : if(epilogue_alias)
1054 2 : p->epilogue_style = true;
1055 : else
1056 1150293 : p->epilogue_style = false;
1057 1150295 : p->tok = t0;
1058 1150295 : p->locations = locations;
1059 1150295 : p->body = parse_stmt_block ();
1060 1150295 : p->privileged = privileged;
1061 1150295 : alias_ret.push_back (p);
1062 1157728 : }
1063 1157710 : }
1064 :
1065 :
1066 : embeddedcode*
1067 252140 : parser::parse_embeddedcode ()
1068 : {
1069 252140 : embeddedcode* e = new embeddedcode;
1070 252140 : const token* t = next ();
1071 252140 : if (t->type != tok_embedded)
1072 0 : throw parse_error ("expected '%{'");
1073 :
1074 252140 : if (! privileged)
1075 : throw parse_error ("embedded code in unprivileged script",
1076 2 : false /* don't skip tokens for parse resumption */);
1077 :
1078 252138 : e->tok = t;
1079 252138 : e->code = t->content;
1080 252138 : return e;
1081 : }
1082 :
1083 :
1084 : block*
1085 1291575 : parser::parse_stmt_block ()
1086 : {
1087 1291575 : block* pb = new block;
1088 :
1089 1291575 : const token* t = next ();
1090 2583150 : if (! (t->type == tok_operator && t->content == "{"))
1091 0 : throw parse_error ("expected '{'");
1092 :
1093 1291575 : pb->tok = t;
1094 :
1095 5213975 : while (1)
1096 : {
1097 : try
1098 : {
1099 6505550 : t = peek ();
1100 6505550 : if (t && t->type == tok_operator && t->content == "}")
1101 : {
1102 1291548 : next ();
1103 : break;
1104 : }
1105 :
1106 5214002 : pb->statements.push_back (parse_statement ());
1107 : }
1108 64 : catch (parse_error& pe)
1109 : {
1110 64 : print_error (pe);
1111 :
1112 : // Quietly swallow all tokens until the next ';' or '}'.
1113 152 : while (1)
1114 : {
1115 216 : const token* t = peek ();
1116 243 : if (! t) return 0;
1117 189 : next ();
1118 189 : if (t->type == tok_operator
1119 : && (t->content == "}" || t->content == ";"))
1120 37 : break;
1121 : }
1122 : }
1123 : }
1124 :
1125 1291548 : return pb;
1126 : }
1127 :
1128 :
1129 : statement*
1130 5801760 : parser::parse_statement ()
1131 : {
1132 5801760 : const token* t = peek ();
1133 5801760 : if (t && t->type == tok_operator && t->content == ";")
1134 : {
1135 17280 : null_statement* n = new null_statement ();
1136 17280 : n->tok = next ();
1137 17280 : return n;
1138 : }
1139 5784480 : else if (t && t->type == tok_operator && t->content == "{")
1140 54671 : return parse_stmt_block ();
1141 5729809 : else if (t && t->type == tok_keyword && t->content == "if")
1142 540578 : return parse_if_statement ();
1143 5189231 : else if (t && t->type == tok_keyword && t->content == "for")
1144 1312 : return parse_for_loop ();
1145 5187919 : else if (t && t->type == tok_keyword && t->content == "foreach")
1146 5610 : return parse_foreach_loop ();
1147 5182309 : else if (t && t->type == tok_keyword && t->content == "return")
1148 479445 : return parse_return_statement ();
1149 4702864 : else if (t && t->type == tok_keyword && t->content == "delete")
1150 6647 : return parse_delete_statement ();
1151 4696217 : else if (t && t->type == tok_keyword && t->content == "while")
1152 25 : return parse_while_loop ();
1153 4696192 : else if (t && t->type == tok_keyword && t->content == "break")
1154 20 : return parse_break_statement ();
1155 4696172 : else if (t && t->type == tok_keyword && t->content == "continue")
1156 20 : return parse_continue_statement ();
1157 4696152 : else if (t && t->type == tok_keyword && t->content == "next")
1158 2179 : return parse_next_statement ();
1159 : // XXX: "do/while" statement?
1160 4693973 : else if (t && (t->type == tok_operator || // expressions are flexible
1161 : t->type == tok_identifier ||
1162 : t->type == tok_number ||
1163 : t->type == tok_string))
1164 4693947 : return parse_expr_statement ();
1165 : // XXX: consider generally accepting tok_embedded here too
1166 : else
1167 26 : throw parse_error ("expected statement");
1168 : }
1169 :
1170 :
1171 : void
1172 15684 : parser::parse_global (vector <vardecl*>& globals, vector<probe*>&)
1173 : {
1174 15684 : const token* t0 = next ();
1175 15684 : if (! (t0->type == tok_keyword && t0->content == "global"))
1176 0 : throw parse_error ("expected 'global'");
1177 :
1178 14352 : while (1)
1179 : {
1180 30036 : const token* t = next ();
1181 30036 : if (! (t->type == tok_identifier))
1182 1 : throw parse_error ("expected identifier");
1183 :
1184 103448 : for (unsigned i=0; i<globals.size(); i++)
1185 73414 : if (globals[i]->name == t->content)
1186 1 : throw parse_error ("duplicate global name");
1187 :
1188 30034 : vardecl* d = new vardecl;
1189 30034 : d->name = t->content;
1190 30034 : d->tok = t;
1191 30034 : globals.push_back (d);
1192 :
1193 30034 : t = peek ();
1194 :
1195 60062 : if (t && t->type == tok_operator && t->content == "[") // array size
1196 : {
1197 : int64_t size;
1198 11917 : next ();
1199 11917 : expect_number(size);
1200 11917 : if (size <= 0 || size > 1000000) // arbitrary max
1201 2 : throw parse_error("array size out of range");
1202 11915 : d->maxsize = (int)size;
1203 11915 : expect_known(tok_operator, "]");
1204 11915 : t = peek ();
1205 : }
1206 :
1207 30032 : if (t && t->type == tok_operator && t->content == "=") // initialization
1208 : {
1209 3273 : if (!d->compatible_arity(0))
1210 2 : throw parse_error("only scalar globals can be initialized");
1211 3271 : d->set_arity(0);
1212 3271 : next ();
1213 3271 : d->init = parse_literal ();
1214 3271 : d->type = d->init->type;
1215 3271 : t = peek ();
1216 : }
1217 :
1218 30030 : if (t && t->type == tok_operator && t->content == ",") // next global
1219 : {
1220 14352 : next ();
1221 : continue;
1222 : }
1223 : else
1224 15678 : break;
1225 : }
1226 15678 : }
1227 :
1228 :
1229 : void
1230 298877 : parser::parse_functiondecl (std::vector<functiondecl*>& functions)
1231 : {
1232 298877 : const token* t = next ();
1233 298877 : if (! (t->type == tok_keyword && t->content == "function"))
1234 0 : throw parse_error ("expected 'function'");
1235 :
1236 :
1237 298877 : t = next ();
1238 298877 : if (! (t->type == tok_identifier)
1239 : && ! (t->type == tok_keyword
1240 : && (t->content == "string" || t->content == "long")))
1241 2 : throw parse_error ("expected identifier");
1242 :
1243 4353297 : for (unsigned i=0; i<functions.size(); i++)
1244 4054423 : if (functions[i]->name == t->content)
1245 1 : throw parse_error ("duplicate function name");
1246 :
1247 298874 : functiondecl *fd = new functiondecl ();
1248 298874 : fd->name = t->content;
1249 298874 : fd->tok = t;
1250 :
1251 298874 : t = next ();
1252 597748 : if (t->type == tok_operator && t->content == ":")
1253 : {
1254 226174 : t = next ();
1255 226174 : if (t->type == tok_keyword && t->content == "string")
1256 78994 : fd->type = pe_string;
1257 147180 : else if (t->type == tok_keyword && t->content == "long")
1258 147179 : fd->type = pe_long;
1259 1 : else throw parse_error ("expected 'string' or 'long'");
1260 :
1261 226173 : t = next ();
1262 : }
1263 :
1264 298873 : if (! (t->type == tok_operator && t->content == "("))
1265 1 : throw parse_error ("expected '('");
1266 :
1267 43305 : while (1)
1268 : {
1269 342177 : t = next ();
1270 :
1271 : // permit zero-argument fuctions
1272 342177 : if (t->type == tok_operator && t->content == ")")
1273 40170 : break;
1274 302007 : else if (! (t->type == tok_identifier))
1275 1 : throw parse_error ("expected identifier");
1276 302006 : vardecl* vd = new vardecl;
1277 302006 : vd->name = t->content;
1278 302006 : vd->tok = t;
1279 302006 : fd->formal_args.push_back (vd);
1280 :
1281 302006 : t = next ();
1282 604011 : if (t->type == tok_operator && t->content == ":")
1283 : {
1284 235887 : t = next ();
1285 235887 : if (t->type == tok_keyword && t->content == "string")
1286 20565 : vd->type = pe_string;
1287 215322 : else if (t->type == tok_keyword && t->content == "long")
1288 215321 : vd->type = pe_long;
1289 1 : else throw parse_error ("expected 'string' or 'long'");
1290 :
1291 235886 : t = next ();
1292 : }
1293 302005 : if (t->type == tok_operator && t->content == ")")
1294 258699 : break;
1295 43306 : if (t->type == tok_operator && t->content == ",")
1296 43305 : continue;
1297 : else
1298 1 : throw parse_error ("expected ',' or ')'");
1299 : }
1300 :
1301 298869 : t = peek ();
1302 518543 : if (t && t->type == tok_embedded)
1303 219675 : fd->body = parse_embeddedcode ();
1304 : else
1305 79194 : fd->body = parse_stmt_block ();
1306 :
1307 298868 : functions.push_back (fd);
1308 298868 : }
1309 :
1310 :
1311 : probe_point*
1312 2983435 : parser::parse_probe_point ()
1313 : {
1314 2983435 : probe_point* pl = new probe_point;
1315 :
1316 4979932 : while (1)
1317 : {
1318 7963367 : const token* t = next (true); // wildcard scanning here
1319 7963367 : if (! (t->type == tok_identifier
1320 : // we must allow ".return" and ".function", which are keywords
1321 : || t->type == tok_keyword))
1322 3 : throw parse_error ("expected identifier or '*'");
1323 :
1324 7963364 : if (pl->tok == 0) pl->tok = t;
1325 :
1326 7963364 : probe_point::component* c = new probe_point::component;
1327 7963364 : c->functor = t->content;
1328 7963364 : pl->components.push_back (c);
1329 : // NB we may add c->arg soon
1330 :
1331 7963364 : t = peek ();
1332 :
1333 : // consume optional parameter
1334 15926728 : if (t && t->type == tok_operator && t->content == "(")
1335 : {
1336 1764527 : next (); // consume "("
1337 1764527 : c->arg = parse_literal ();
1338 :
1339 1764523 : t = next ();
1340 1764523 : if (! (t->type == tok_operator && t->content == ")"))
1341 2 : throw parse_error ("expected ')'");
1342 :
1343 1764521 : t = peek ();
1344 : }
1345 :
1346 7963358 : if (t && t->type == tok_operator && t->content == ".")
1347 : {
1348 4979932 : next ();
1349 : continue;
1350 : }
1351 :
1352 : // We only fall through here at the end of a probe point (past
1353 : // all the dotted/parametrized components).
1354 :
1355 2983426 : if (t && t->type == tok_operator &&
1356 : (t->content == "?" || t->content == "!"))
1357 : {
1358 1110320 : pl->optional = true;
1359 1110320 : if (t->content == "!") pl->sufficient = true;
1360 : // NB: sufficient implies optional
1361 1110320 : next ();
1362 1110320 : t = peek ();
1363 : // fall through
1364 : }
1365 :
1366 2983426 : if (t && t->type == tok_keyword && t->content == "if")
1367 : {
1368 58 : next ();
1369 58 : t = peek ();
1370 58 : if (t && ! (t->type == tok_operator && t->content == "("))
1371 0 : throw parse_error ("expected '('");
1372 58 : next ();
1373 :
1374 57 : pl->condition = parse_expression ();
1375 :
1376 57 : t = peek ();
1377 57 : if (t && ! (t->type == tok_operator && t->content == ")"))
1378 1 : throw parse_error ("expected ')'");
1379 56 : next ();
1380 56 : t = peek ();
1381 : // fall through
1382 : }
1383 :
1384 2983424 : if (t && t->type == tok_operator
1385 : && (t->content == "{" || t->content == "," ||
1386 : t->content == "=" || t->content == "+=" ))
1387 2983417 : break;
1388 :
1389 7 : throw parse_error ("expected one of '. , ( ? ! { = +='");
1390 : }
1391 :
1392 2983417 : return pl;
1393 : }
1394 :
1395 :
1396 : literal*
1397 5288341 : parser::parse_literal ()
1398 : {
1399 5288341 : const token* t = next ();
1400 : literal* l;
1401 5288341 : if (t->type == tok_string)
1402 3677543 : l = new literal_string (t->content);
1403 : else
1404 : {
1405 1610798 : bool neg = false;
1406 1610798 : if (t->type == tok_operator && t->content == "-")
1407 : {
1408 3265 : neg = true;
1409 3265 : t = next ();
1410 : }
1411 :
1412 1610798 : if (t->type == tok_number)
1413 : {
1414 1610788 : const char* startp = t->content.c_str ();
1415 1610788 : char* endp = (char*) startp;
1416 :
1417 : // NB: we allow controlled overflow from LLONG_MIN .. ULLONG_MAX
1418 : // Actually, this allows all the way from -ULLONG_MAX to ULLONG_MAX,
1419 : // since the lexer only gives us positive digit strings, but we'll
1420 : // limit it to LLONG_MIN when a '-' operator is fed into the literal.
1421 1610788 : errno = 0;
1422 1610788 : long long value = (long long) strtoull (startp, & endp, 0);
1423 1610788 : if (errno == ERANGE || errno == EINVAL || *endp != '\0'
1424 : || (neg && (unsigned long long) value > 9223372036854775808ULL)
1425 : || (unsigned long long) value > 18446744073709551615ULL
1426 : || value < -9223372036854775807LL-1)
1427 5 : throw parse_error ("number invalid or out of range");
1428 :
1429 1610783 : if (neg)
1430 3265 : value = -value;
1431 :
1432 1610783 : l = new literal_number (value);
1433 : }
1434 : else
1435 10 : throw parse_error ("expected literal string or number");
1436 : }
1437 :
1438 5288326 : l->tok = t;
1439 5288326 : return l;
1440 : }
1441 :
1442 :
1443 : if_statement*
1444 540578 : parser::parse_if_statement ()
1445 : {
1446 540578 : const token* t = next ();
1447 540578 : if (! (t->type == tok_keyword && t->content == "if"))
1448 0 : throw parse_error ("expected 'if'");
1449 540578 : if_statement* s = new if_statement;
1450 540578 : s->tok = t;
1451 :
1452 540578 : t = next ();
1453 1081155 : if (! (t->type == tok_operator && t->content == "("))
1454 1 : throw parse_error ("expected '('");
1455 :
1456 540577 : s->condition = parse_expression ();
1457 :
1458 540577 : t = next ();
1459 540577 : if (! (t->type == tok_operator && t->content == ")"))
1460 1 : throw parse_error ("expected ')'");
1461 :
1462 540576 : s->thenblock = parse_statement ();
1463 :
1464 540576 : t = peek ();
1465 540576 : if (t && t->type == tok_keyword && t->content == "else")
1466 : {
1467 40251 : next ();
1468 40251 : s->elseblock = parse_statement ();
1469 : }
1470 : else
1471 500325 : s->elseblock = 0; // in case not otherwise initialized
1472 :
1473 540576 : return s;
1474 : }
1475 :
1476 :
1477 : expr_statement*
1478 4696499 : parser::parse_expr_statement ()
1479 : {
1480 4696499 : expr_statement *es = new expr_statement;
1481 4696499 : const token* t = peek ();
1482 4696499 : es->tok = t;
1483 4696499 : es->value = parse_expression ();
1484 4696481 : return es;
1485 : }
1486 :
1487 :
1488 : return_statement*
1489 479445 : parser::parse_return_statement ()
1490 : {
1491 479445 : const token* t = next ();
1492 479445 : if (! (t->type == tok_keyword && t->content == "return"))
1493 0 : throw parse_error ("expected 'return'");
1494 479445 : if (context != con_function)
1495 1 : throw parse_error ("found 'return' not in function context");
1496 479444 : return_statement* s = new return_statement;
1497 479444 : s->tok = t;
1498 479444 : s->value = parse_expression ();
1499 479444 : return s;
1500 : }
1501 :
1502 :
1503 : delete_statement*
1504 6647 : parser::parse_delete_statement ()
1505 : {
1506 6647 : const token* t = next ();
1507 6647 : if (! (t->type == tok_keyword && t->content == "delete"))
1508 0 : throw parse_error ("expected 'delete'");
1509 6647 : delete_statement* s = new delete_statement;
1510 6647 : s->tok = t;
1511 6647 : s->value = parse_expression ();
1512 6647 : return s;
1513 : }
1514 :
1515 :
1516 : next_statement*
1517 2179 : parser::parse_next_statement ()
1518 : {
1519 2179 : const token* t = next ();
1520 2179 : if (! (t->type == tok_keyword && t->content == "next"))
1521 0 : throw parse_error ("expected 'next'");
1522 2179 : if (context != con_probe)
1523 1 : throw parse_error ("found 'next' not in probe context");
1524 2178 : next_statement* s = new next_statement;
1525 2178 : s->tok = t;
1526 2178 : return s;
1527 : }
1528 :
1529 :
1530 : break_statement*
1531 20 : parser::parse_break_statement ()
1532 : {
1533 20 : const token* t = next ();
1534 20 : if (! (t->type == tok_keyword && t->content == "break"))
1535 0 : throw parse_error ("expected 'break'");
1536 20 : break_statement* s = new break_statement;
1537 20 : s->tok = t;
1538 20 : return s;
1539 : }
1540 :
1541 :
1542 : continue_statement*
1543 20 : parser::parse_continue_statement ()
1544 : {
1545 20 : const token* t = next ();
1546 20 : if (! (t->type == tok_keyword && t->content == "continue"))
1547 0 : throw parse_error ("expected 'continue'");
1548 20 : continue_statement* s = new continue_statement;
1549 20 : s->tok = t;
1550 20 : return s;
1551 : }
1552 :
1553 :
1554 : for_loop*
1555 1312 : parser::parse_for_loop ()
1556 : {
1557 1312 : const token* t = next ();
1558 1312 : if (! (t->type == tok_keyword && t->content == "for"))
1559 0 : throw parse_error ("expected 'for'");
1560 1312 : for_loop* s = new for_loop;
1561 1312 : s->tok = t;
1562 :
1563 1312 : t = next ();
1564 2623 : if (! (t->type == tok_operator && t->content == "("))
1565 1 : throw parse_error ("expected '('");
1566 :
1567 : // initializer + ";"
1568 1311 : t = peek ();
1569 1311 : if (t && t->type == tok_operator && t->content == ";")
1570 : {
1571 32 : s->init = 0;
1572 32 : next ();
1573 : }
1574 : else
1575 : {
1576 1279 : s->init = parse_expr_statement ();
1577 1279 : t = next ();
1578 1279 : if (! (t->type == tok_operator && t->content == ";"))
1579 1 : throw parse_error ("expected ';'");
1580 : }
1581 :
1582 : // condition + ";"
1583 1310 : t = peek ();
1584 1310 : if (t && t->type == tok_operator && t->content == ";")
1585 : {
1586 16 : literal_number* l = new literal_number(1);
1587 16 : s->cond = l;
1588 16 : s->cond->tok = next ();
1589 : }
1590 : else
1591 : {
1592 1294 : s->cond = parse_expression ();
1593 1294 : t = next ();
1594 1294 : if (! (t->type == tok_operator && t->content == ";"))
1595 1 : throw parse_error ("expected ';'");
1596 : }
1597 :
1598 : // increment + ")"
1599 1309 : t = peek ();
1600 1309 : if (t && t->type == tok_operator && t->content == ")")
1601 : {
1602 36 : s->incr = 0;
1603 36 : next ();
1604 : }
1605 : else
1606 : {
1607 1273 : s->incr = parse_expr_statement ();
1608 1273 : t = next ();
1609 1273 : if (! (t->type == tok_operator && t->content == ")"))
1610 1 : throw parse_error ("expected ')'");
1611 : }
1612 :
1613 : // block
1614 1308 : s->block = parse_statement ();
1615 :
1616 1308 : return s;
1617 : }
1618 :
1619 :
1620 : for_loop*
1621 25 : parser::parse_while_loop ()
1622 : {
1623 25 : const token* t = next ();
1624 25 : if (! (t->type == tok_keyword && t->content == "while"))
1625 0 : throw parse_error ("expected 'while'");
1626 25 : for_loop* s = new for_loop;
1627 25 : s->tok = t;
1628 :
1629 25 : t = next ();
1630 49 : if (! (t->type == tok_operator && t->content == "("))
1631 1 : throw parse_error ("expected '('");
1632 :
1633 : // dummy init and incr fields
1634 24 : s->init = 0;
1635 24 : s->incr = 0;
1636 :
1637 : // condition
1638 24 : s->cond = parse_expression ();
1639 :
1640 24 : t = next ();
1641 24 : if (! (t->type == tok_operator && t->content == ")"))
1642 1 : throw parse_error ("expected ')'");
1643 :
1644 : // block
1645 23 : s->block = parse_statement ();
1646 :
1647 23 : return s;
1648 : }
1649 :
1650 :
1651 : foreach_loop*
1652 5610 : parser::parse_foreach_loop ()
1653 : {
1654 5610 : const token* t = next ();
1655 5610 : if (! (t->type == tok_keyword && t->content == "foreach"))
1656 0 : throw parse_error ("expected 'foreach'");
1657 5610 : foreach_loop* s = new foreach_loop;
1658 5610 : s->tok = t;
1659 5610 : s->sort_direction = 0;
1660 5610 : s->limit = NULL;
1661 :
1662 5610 : t = next ();
1663 11219 : if (! (t->type == tok_operator && t->content == "("))
1664 1 : throw parse_error ("expected '('");
1665 :
1666 : // see also parse_array_in
1667 :
1668 5609 : bool parenthesized = false;
1669 5609 : t = peek ();
1670 5609 : if (t && t->type == tok_operator && t->content == "[")
1671 : {
1672 69 : next ();
1673 69 : parenthesized = true;
1674 : }
1675 :
1676 60 : while (1)
1677 : {
1678 5669 : t = next ();
1679 5669 : if (! (t->type == tok_identifier))
1680 1 : throw parse_error ("expected identifier");
1681 5668 : symbol* sym = new symbol;
1682 5668 : sym->tok = t;
1683 5668 : sym->name = t->content;
1684 5668 : s->indexes.push_back (sym);
1685 :
1686 5668 : t = peek ();
1687 11336 : if (t && t->type == tok_operator &&
1688 : (t->content == "+" || t->content == "-"))
1689 : {
1690 34 : if (s->sort_direction)
1691 1 : throw parse_error ("multiple sort directives");
1692 33 : s->sort_direction = (t->content == "+") ? 1 : -1;
1693 33 : s->sort_column = s->indexes.size();
1694 33 : next();
1695 : }
1696 :
1697 5667 : if (parenthesized)
1698 : {
1699 128 : t = peek ();
1700 128 : if (t && t->type == tok_operator && t->content == ",")
1701 : {
1702 60 : next ();
1703 : continue;
1704 : }
1705 68 : else if (t && t->type == tok_operator && t->content == "]")
1706 : {
1707 67 : next ();
1708 67 : break;
1709 : }
1710 : else
1711 1 : throw parse_error ("expected ',' or ']'");
1712 : }
1713 : else
1714 5539 : break; // expecting only one expression
1715 : }
1716 :
1717 5606 : t = next ();
1718 5606 : if (! (t->type == tok_keyword && t->content == "in"))
1719 1 : throw parse_error ("expected 'in'");
1720 :
1721 5605 : s->base = parse_indexable();
1722 :
1723 5604 : t = peek ();
1724 5604 : if (t && t->type == tok_operator &&
1725 : (t->content == "+" || t->content == "-"))
1726 : {
1727 1120 : if (s->sort_direction)
1728 1 : throw parse_error ("multiple sort directives");
1729 1119 : s->sort_direction = (t->content == "+") ? 1 : -1;
1730 1119 : s->sort_column = 0;
1731 1119 : next();
1732 : }
1733 :
1734 5603 : t = peek ();
1735 5603 : if (tok_is(t, tok_keyword, "limit"))
1736 : {
1737 39 : next (); // get past the "limit"
1738 39 : s->limit = parse_expression ();
1739 : }
1740 :
1741 5602 : t = next ();
1742 5602 : if (! (t->type == tok_operator && t->content == ")"))
1743 2 : throw parse_error ("expected ')'");
1744 :
1745 5600 : s->block = parse_statement ();
1746 5600 : return s;
1747 : }
1748 :
1749 :
1750 : expression*
1751 13887324 : parser::parse_expression ()
1752 : {
1753 13887324 : return parse_assignment ();
1754 : }
1755 :
1756 :
1757 : expression*
1758 13887324 : parser::parse_assignment ()
1759 : {
1760 13887324 : expression* op1 = parse_ternary ();
1761 :
1762 13887304 : const token* t = peek ();
1763 : // right-associative operators
1764 13887304 : if (t && t->type == tok_operator
1765 : && (t->content == "=" ||
1766 : t->content == "<<<" ||
1767 : t->content == "+=" ||
1768 : t->content == "-=" ||
1769 : t->content == "*=" ||
1770 : t->content == "/=" ||
1771 : t->content == "%=" ||
1772 : t->content == "<<=" ||
1773 : t->content == ">>=" ||
1774 : t->content == "&=" ||
1775 : t->content == "^=" ||
1776 : t->content == "|=" ||
1777 : t->content == ".=" ||
1778 : false))
1779 : {
1780 : // NB: lvalueness is checked during elaboration / translation
1781 4675489 : assignment* e = new assignment;
1782 4675489 : e->left = op1;
1783 4675489 : e->op = t->content;
1784 4675489 : e->tok = t;
1785 4675489 : next ();
1786 4675489 : e->right = parse_expression ();
1787 4675481 : op1 = e;
1788 : }
1789 :
1790 13887296 : return op1;
1791 : }
1792 :
1793 :
1794 : expression*
1795 13887324 : parser::parse_ternary ()
1796 : {
1797 13887324 : expression* op1 = parse_logical_or ();
1798 :
1799 13887305 : const token* t = peek ();
1800 13887305 : if (t && t->type == tok_operator && t->content == "?")
1801 : {
1802 20589 : ternary_expression* e = new ternary_expression;
1803 20589 : e->tok = t;
1804 20589 : e->cond = op1;
1805 20589 : next ();
1806 20589 : e->truevalue = parse_expression (); // XXX
1807 :
1808 20589 : t = next ();
1809 41178 : if (! (t->type == tok_operator && t->content == ":"))
1810 1 : throw parse_error ("expected ':'");
1811 :
1812 20588 : e->falsevalue = parse_expression (); // XXX
1813 20588 : return e;
1814 : }
1815 : else
1816 13866716 : return op1;
1817 : }
1818 :
1819 :
1820 : expression*
1821 13887324 : parser::parse_logical_or ()
1822 : {
1823 13887324 : expression* op1 = parse_logical_and ();
1824 :
1825 13887305 : const token* t = peek ();
1826 27775705 : while (t && t->type == tok_operator && t->content == "||")
1827 : {
1828 1095 : logical_or_expr* e = new logical_or_expr;
1829 1095 : e->tok = t;
1830 1095 : e->op = t->content;
1831 1095 : e->left = op1;
1832 1095 : next ();
1833 1095 : e->right = parse_logical_and ();
1834 1095 : op1 = e;
1835 1095 : t = peek ();
1836 : }
1837 :
1838 13887305 : return op1;
1839 : }
1840 :
1841 :
1842 : expression*
1843 13888419 : parser::parse_logical_and ()
1844 : {
1845 13888419 : expression* op1 = parse_boolean_or ();
1846 :
1847 13888400 : const token* t = peek ();
1848 27780076 : while (t && t->type == tok_operator && t->content == "&&")
1849 : {
1850 3276 : logical_and_expr *e = new logical_and_expr;
1851 3276 : e->left = op1;
1852 3276 : e->op = t->content;
1853 3276 : e->tok = t;
1854 3276 : next ();
1855 3276 : e->right = parse_boolean_or ();
1856 3276 : op1 = e;
1857 3276 : t = peek ();
1858 : }
1859 :
1860 13888400 : return op1;
1861 : }
1862 :
1863 :
1864 : expression*
1865 13891695 : parser::parse_boolean_or ()
1866 : {
1867 13891695 : expression* op1 = parse_boolean_xor ();
1868 :
1869 13891676 : const token* t = peek ();
1870 27783365 : while (t && t->type == tok_operator && t->content == "|")
1871 : {
1872 13 : binary_expression* e = new binary_expression;
1873 13 : e->left = op1;
1874 13 : e->op = t->content;
1875 13 : e->tok = t;
1876 13 : next ();
1877 13 : e->right = parse_boolean_xor ();
1878 13 : op1 = e;
1879 13 : t = peek ();
1880 : }
1881 :
1882 13891676 : return op1;
1883 : }
1884 :
1885 :
1886 : expression*
1887 13891708 : parser::parse_boolean_xor ()
1888 : {
1889 13891708 : expression* op1 = parse_boolean_and ();
1890 :
1891 13891689 : const token* t = peek ();
1892 27783391 : while (t && t->type == tok_operator && t->content == "^")
1893 : {
1894 13 : binary_expression* e = new binary_expression;
1895 13 : e->left = op1;
1896 13 : e->op = t->content;
1897 13 : e->tok = t;
1898 13 : next ();
1899 13 : e->right = parse_boolean_and ();
1900 13 : op1 = e;
1901 13 : t = peek ();
1902 : }
1903 :
1904 13891689 : return op1;
1905 : }
1906 :
1907 :
1908 : expression*
1909 13891721 : parser::parse_boolean_and ()
1910 : {
1911 13891721 : expression* op1 = parse_array_in ();
1912 :
1913 13891702 : const token* t = peek ();
1914 27853748 : while (t && t->type == tok_operator && t->content == "&")
1915 : {
1916 70344 : binary_expression* e = new binary_expression;
1917 70344 : e->left = op1;
1918 70344 : e->op = t->content;
1919 70344 : e->tok = t;
1920 70344 : next ();
1921 70344 : e->right = parse_array_in ();
1922 70344 : op1 = e;
1923 70344 : t = peek ();
1924 : }
1925 :
1926 13891702 : return op1;
1927 : }
1928 :
1929 :
1930 : expression*
1931 13962065 : parser::parse_array_in ()
1932 : {
1933 : // This is a very tricky case. All these are legit expressions:
1934 : // "a in b" "a+0 in b" "[a,b] in c" "[c,(d+0)] in b"
1935 13962065 : vector<expression*> indexes;
1936 13962065 : bool parenthesized = false;
1937 :
1938 27924128 : const token* t = peek ();
1939 13962063 : if (t && t->type == tok_operator && t->content == "[")
1940 : {
1941 1128 : next ();
1942 1128 : parenthesized = true;
1943 : }
1944 :
1945 34 : while (1)
1946 : {
1947 13962097 : expression* op1 = parse_comparison ();
1948 13962080 : indexes.push_back (op1);
1949 :
1950 13962080 : if (parenthesized)
1951 : {
1952 1159 : const token* t = peek ();
1953 1159 : if (t && t->type == tok_operator && t->content == ",")
1954 : {
1955 34 : next ();
1956 : continue;
1957 : }
1958 1125 : else if (t && t->type == tok_operator && t->content == "]")
1959 : {
1960 1125 : next ();
1961 1125 : break;
1962 : }
1963 : else
1964 0 : throw parse_error ("expected ',' or ']'");
1965 : }
1966 : else
1967 13960921 : break; // expecting only one expression
1968 : }
1969 :
1970 13962046 : t = peek ();
1971 13962046 : if (t && t->type == tok_keyword && t->content == "in")
1972 : {
1973 14154 : array_in *e = new array_in;
1974 14154 : e->tok = t;
1975 14154 : next (); // swallow "in"
1976 :
1977 14154 : arrayindex* a = new arrayindex;
1978 14154 : a->indexes = indexes;
1979 14154 : a->base = parse_indexable();
1980 14154 : a->tok = a->base->get_tok();
1981 14154 : e->operand = a;
1982 14154 : return e;
1983 : }
1984 13947892 : else if (indexes.size() == 1) // no "in" - need one expression only
1985 13947892 : return indexes[0];
1986 : else
1987 0 : throw parse_error ("unexpected comma-separated expression list");
1988 : }
1989 :
1990 :
1991 : expression*
1992 13962097 : parser::parse_comparison ()
1993 : {
1994 13962097 : expression* op1 = parse_shift ();
1995 :
1996 13962080 : const token* t = peek ();
1997 28411697 : while (t && t->type == tok_operator
1998 : && (t->content == ">" ||
1999 : t->content == "<" ||
2000 : t->content == "==" ||
2001 : t->content == "!=" ||
2002 : t->content == "<=" ||
2003 : t->content == ">="))
2004 : {
2005 487537 : comparison* e = new comparison;
2006 487537 : e->left = op1;
2007 487537 : e->op = t->content;
2008 487537 : e->tok = t;
2009 487537 : next ();
2010 487537 : e->right = parse_shift ();
2011 487537 : op1 = e;
2012 487537 : t = peek ();
2013 : }
2014 :
2015 13962080 : return op1;
2016 : }
2017 :
2018 :
2019 : expression*
2020 14449634 : parser::parse_shift ()
2021 : {
2022 14449634 : expression* op1 = parse_concatenation ();
2023 :
2024 14449617 : const token* t = peek ();
2025 28901431 : while (t && t->type == tok_operator &&
2026 : (t->content == "<<" || t->content == ">>"))
2027 : {
2028 2197 : binary_expression* e = new binary_expression;
2029 2197 : e->left = op1;
2030 2197 : e->op = t->content;
2031 2197 : e->tok = t;
2032 2197 : next ();
2033 2197 : e->right = parse_concatenation ();
2034 2197 : op1 = e;
2035 2197 : t = peek ();
2036 : }
2037 :
2038 14449617 : return op1;
2039 : }
2040 :
2041 :
2042 : expression*
2043 14451831 : parser::parse_concatenation ()
2044 : {
2045 14451831 : expression* op1 = parse_additive ();
2046 :
2047 14451814 : const token* t = peek ();
2048 : // XXX: the actual awk string-concatenation operator is *whitespace*.
2049 : // I don't know how to easily to model that here.
2050 28965667 : while (t && t->type == tok_operator && t->content == ".")
2051 : {
2052 62039 : concatenation* e = new concatenation;
2053 62039 : e->left = op1;
2054 62039 : e->op = t->content;
2055 62039 : e->tok = t;
2056 62039 : next ();
2057 62039 : e->right = parse_additive ();
2058 62039 : op1 = e;
2059 62039 : t = peek ();
2060 : }
2061 :
2062 14451814 : return op1;
2063 : }
2064 :
2065 :
2066 : expression*
2067 14513870 : parser::parse_additive ()
2068 : {
2069 14513870 : expression* op1 = parse_multiplicative ();
2070 :
2071 14513854 : const token* t = peek ();
2072 29052075 : while (t && t->type == tok_operator
2073 : && (t->content == "+" || t->content == "-"))
2074 : {
2075 24368 : binary_expression* e = new binary_expression;
2076 24368 : e->op = t->content;
2077 24368 : e->left = op1;
2078 24368 : e->tok = t;
2079 24368 : next ();
2080 24368 : e->right = parse_multiplicative ();
2081 24367 : op1 = e;
2082 24367 : t = peek ();
2083 : }
2084 :
2085 14513853 : return op1;
2086 : }
2087 :
2088 :
2089 : expression*
2090 14538238 : parser::parse_multiplicative ()
2091 : {
2092 14538238 : expression* op1 = parse_unary ();
2093 :
2094 14538221 : const token* t = peek ();
2095 29100503 : while (t && t->type == tok_operator
2096 : && (t->content == "*" || t->content == "/" || t->content == "%"))
2097 : {
2098 24061 : binary_expression* e = new binary_expression;
2099 24061 : e->op = t->content;
2100 24061 : e->left = op1;
2101 24061 : e->tok = t;
2102 24061 : next ();
2103 24061 : e->right = parse_unary ();
2104 24061 : op1 = e;
2105 24061 : t = peek ();
2106 : }
2107 :
2108 14538221 : return op1;
2109 : }
2110 :
2111 :
2112 : expression*
2113 14562299 : parser::parse_unary ()
2114 : {
2115 14562299 : const token* t = peek ();
2116 14562299 : if (t && t->type == tok_operator
2117 : && (t->content == "+" ||
2118 : t->content == "-" ||
2119 : t->content == "!" ||
2120 : t->content == "~" ||
2121 : false))
2122 : {
2123 30485 : unary_expression* e = new unary_expression;
2124 30485 : e->op = t->content;
2125 30485 : e->tok = t;
2126 30485 : next ();
2127 30485 : e->operand = parse_crement ();
2128 30484 : return e;
2129 : }
2130 : else
2131 14531814 : return parse_crement ();
2132 : }
2133 :
2134 :
2135 : expression*
2136 14562299 : parser::parse_crement () // as in "increment" / "decrement"
2137 : {
2138 : // NB: Ideally, we'd parse only a symbol as an operand to the
2139 : // *crement operators, instead of a general expression value. We'd
2140 : // need more complex lookahead code to tell apart the postfix cases.
2141 : // So we just punt, and leave it to pass-3 to signal errors on
2142 : // cases like "4++".
2143 :
2144 14562299 : const token* t = peek ();
2145 14562299 : if (t && t->type == tok_operator
2146 : && (t->content == "++" || t->content == "--"))
2147 : {
2148 80 : pre_crement* e = new pre_crement;
2149 80 : e->op = t->content;
2150 80 : e->tok = t;
2151 80 : next ();
2152 80 : e->operand = parse_value ();
2153 80 : return e;
2154 : }
2155 :
2156 : // post-crement or non-crement
2157 14562219 : expression *op1 = parse_value ();
2158 :
2159 14562202 : t = peek ();
2160 14562202 : if (t && t->type == tok_operator
2161 : && (t->content == "++" || t->content == "--"))
2162 : {
2163 7022 : post_crement* e = new post_crement;
2164 7022 : e->op = t->content;
2165 7022 : e->tok = t;
2166 7022 : next ();
2167 7022 : e->operand = op1;
2168 7022 : return e;
2169 : }
2170 : else
2171 14555180 : return op1;
2172 : }
2173 :
2174 :
2175 : expression*
2176 14562299 : parser::parse_value ()
2177 : {
2178 14562299 : const token* t = peek ();
2179 14562299 : if (! t)
2180 0 : throw parse_error ("expected value");
2181 :
2182 14562299 : if (t->type == tok_operator && t->content == "(")
2183 : {
2184 44948 : next ();
2185 44948 : expression* e = parse_expression ();
2186 44948 : t = next ();
2187 44948 : if (! (t->type == tok_operator && t->content == ")"))
2188 0 : throw parse_error ("expected ')'");
2189 44948 : return e;
2190 : }
2191 14517351 : else if (t->type == tok_identifier)
2192 10996808 : return parse_symbol ();
2193 : else
2194 3520543 : return parse_literal ();
2195 : }
2196 :
2197 :
2198 : const token *
2199 11016668 : parser::parse_hist_op_or_bare_name (hist_op *&hop, string &name)
2200 : {
2201 11016668 : hop = NULL;
2202 11016668 : const token* t = expect_ident (name);
2203 11016667 : if (name == "@hist_linear" || name == "@hist_log")
2204 : {
2205 142 : hop = new hist_op;
2206 142 : if (name == "@hist_linear")
2207 57 : hop->htype = hist_linear;
2208 85 : else if (name == "@hist_log")
2209 85 : hop->htype = hist_log;
2210 142 : hop->tok = t;
2211 142 : expect_op("(");
2212 142 : hop->stat = parse_expression ();
2213 : int64_t tnum;
2214 142 : if (hop->htype == hist_linear)
2215 : {
2216 456 : for (size_t i = 0; i < 3; ++i)
2217 : {
2218 171 : expect_op (",");
2219 171 : expect_number (tnum);
2220 171 : hop->params.push_back (tnum);
2221 : }
2222 : }
2223 142 : expect_op(")");
2224 : }
2225 11016667 : return t;
2226 : }
2227 :
2228 :
2229 : indexable*
2230 19759 : parser::parse_indexable ()
2231 : {
2232 19759 : hist_op *hop = NULL;
2233 19759 : string name;
2234 19759 : const token *tok = parse_hist_op_or_bare_name(hop, name);
2235 19758 : if (hop)
2236 11 : return hop;
2237 : else
2238 : {
2239 19747 : symbol* sym = new symbol;
2240 19747 : sym->name = name;
2241 19747 : sym->tok = tok;
2242 19747 : return sym;
2243 1 : }
2244 : }
2245 :
2246 :
2247 : // var, indexable[index], func(parms), printf("...", ...), $var, $var->member, @stat_op(stat)
2248 : expression*
2249 10996808 : parser::parse_symbol ()
2250 : {
2251 10996808 : hist_op *hop = NULL;
2252 10996808 : symbol *sym = NULL;
2253 10996808 : string name;
2254 10996808 : const token *t = parse_hist_op_or_bare_name(hop, name);
2255 :
2256 10996808 : if (!hop)
2257 : {
2258 : // If we didn't get a hist_op, then we did get an identifier. We can
2259 : // now scrutinize this identifier for the various magic forms of identifier
2260 : // (printf, @stat_op, and $var...)
2261 :
2262 : bool pf_stream, pf_format, pf_delim, pf_newline, pf_char;
2263 :
2264 10996778 : if (name.size() > 0 && name[0] == '@')
2265 : {
2266 243 : stat_op *sop = new stat_op;
2267 486 : if (name == "@avg")
2268 47 : sop->ctype = sc_average;
2269 196 : else if (name == "@count")
2270 83 : sop->ctype = sc_count;
2271 113 : else if (name == "@sum")
2272 46 : sop->ctype = sc_sum;
2273 67 : else if (name == "@min")
2274 33 : sop->ctype = sc_min;
2275 34 : else if (name == "@max")
2276 33 : sop->ctype = sc_max;
2277 : else
2278 1 : throw parse_error("unknown statistic operator " + name);
2279 242 : expect_op("(");
2280 242 : sop->tok = t;
2281 484 : sop->stat = parse_expression ();
2282 242 : expect_op(")");
2283 242 : return sop;
2284 : }
2285 :
2286 10996535 : else if (print_format::parse_print(name,
2287 : pf_stream, pf_format, pf_delim, pf_newline, pf_char))
2288 : {
2289 595271 : print_format *fmt = new print_format;
2290 595271 : fmt->tok = t;
2291 595271 : fmt->print_to_stream = pf_stream;
2292 595271 : fmt->print_with_format = pf_format;
2293 595271 : fmt->print_with_delim = pf_delim;
2294 595271 : fmt->print_with_newline = pf_newline;
2295 595271 : fmt->print_char = pf_char;
2296 :
2297 1190542 : expect_op("(");
2298 1190542 : if ((name == "print" || name == "println") &&
2299 : (peek_kw("@hist_linear") || peek_kw("@hist_log")))
2300 : {
2301 : // We have a special case where we recognize
2302 : // print(@hist_foo(bar)) as a magic print-the-histogram
2303 : // construct. This is sort of gross but it avoids
2304 : // promoting histogram references to typeful
2305 : // expressions.
2306 :
2307 101 : hop = NULL;
2308 101 : t = parse_hist_op_or_bare_name(hop, name);
2309 101 : assert(hop);
2310 :
2311 : // It is, sadly, possible that even while parsing a
2312 : // hist_op, we *mis-guessed* and the user wishes to
2313 : // print(@hist_op(foo)[bucket]), a scalar. In that case
2314 : // we must parse the arrayindex and print an expression.
2315 :
2316 101 : if (!peek_op ("["))
2317 101 : fmt->hist = hop;
2318 : else
2319 : {
2320 : // This is simplified version of the
2321 : // multi-array-index parser below, because we can
2322 : // only ever have one index on a histogram anyways.
2323 0 : expect_op("[");
2324 0 : struct arrayindex* ai = new arrayindex;
2325 0 : ai->tok = t;
2326 0 : ai->base = hop;
2327 0 : ai->indexes.push_back (parse_expression ());
2328 0 : expect_op("]");
2329 0 : fmt->args.push_back(ai);
2330 : }
2331 : }
2332 : else
2333 : {
2334 595170 : int min_args = 0;
2335 595170 : if (fmt->print_with_format)
2336 : {
2337 : // Consume and convert a format string. Agreement between the
2338 : // format string and the arguments is postponed to the
2339 : // typechecking phase.
2340 569404 : string tmp;
2341 569404 : expect_unknown (tok_string, tmp);
2342 569404 : fmt->raw_components = tmp;
2343 569404 : fmt->components = print_format::string_to_components (tmp);
2344 : }
2345 25766 : else if (fmt->print_with_delim)
2346 : {
2347 : // Consume a delimiter to separate arguments.
2348 39 : fmt->delimiter.clear();
2349 39 : fmt->delimiter.type = print_format::conv_literal;
2350 39 : expect_unknown (tok_string, fmt->delimiter.literal_string);
2351 37 : min_args = 2;
2352 : }
2353 : else
2354 : {
2355 : // If we are not printing with a format string, we must have
2356 : // at least one argument (of any type).
2357 25727 : expression *e = parse_expression ();
2358 25727 : fmt->args.push_back(e);
2359 : }
2360 :
2361 : // Consume any subsequent arguments.
2362 2535400 : while (min_args || !peek_op (")"))
2363 : {
2364 1345069 : expect_op(",");
2365 2690129 : expression *e = parse_expression ();
2366 1345064 : fmt->args.push_back(e);
2367 1345064 : if (min_args)
2368 72 : --min_args;
2369 : }
2370 : }
2371 595266 : expect_op(")");
2372 595266 : return fmt;
2373 : }
2374 :
2375 10401264 : else if (name.size() > 0 && name[0] == '$')
2376 : {
2377 : // target_symbol time
2378 2798317 : target_symbol *tsym = new target_symbol;
2379 2798317 : tsym->tok = t;
2380 2798317 : tsym->base_name = name;
2381 618970 : while (true)
2382 : {
2383 3417287 : string c;
2384 3417287 : if (peek_op ("->"))
2385 : {
2386 618960 : next();
2387 618960 : expect_ident_or_keyword (c);
2388 : tsym->components.push_back
2389 618960 : (make_pair (target_symbol::comp_struct_member, c));
2390 : }
2391 2798327 : else if (peek_op ("["))
2392 : {
2393 10 : next();
2394 10 : expect_unknown (tok_number, c);
2395 10 : expect_op ("]");
2396 : tsym->components.push_back
2397 20 : (make_pair (target_symbol::comp_literal_array_index, c));
2398 : }
2399 : else
2400 : break;
2401 : }
2402 2798317 : return tsym;
2403 : }
2404 :
2405 7602947 : else if (peek_op ("(")) // function call
2406 : {
2407 1428436 : next ();
2408 1428436 : struct functioncall* f = new functioncall;
2409 1428436 : f->tok = t;
2410 1428436 : f->function = name;
2411 : // Allow empty actual parameter list
2412 1428436 : if (peek_op (")"))
2413 : {
2414 27941 : next ();
2415 27941 : return f;
2416 : }
2417 267630 : while (1)
2418 : {
2419 1668125 : f->args.push_back (parse_expression ());
2420 1668125 : if (peek_op (")"))
2421 : {
2422 1400495 : next();
2423 : break;
2424 : }
2425 267630 : else if (peek_op (","))
2426 : {
2427 267630 : next();
2428 : continue;
2429 : }
2430 : else
2431 0 : throw parse_error ("expected ',' or ')'");
2432 : }
2433 1400495 : return f;
2434 : }
2435 :
2436 : else
2437 : {
2438 6174511 : sym = new symbol;
2439 6174511 : sym->name = name;
2440 6174511 : sym->tok = t;
2441 : }
2442 : }
2443 :
2444 : // By now, either we had a hist_op in the first place, or else
2445 : // we had a plain word and it was converted to a symbol.
2446 :
2447 6174541 : assert (!hop != !sym); // logical XOR
2448 :
2449 : // All that remains is to check for array indexing
2450 :
2451 6174541 : if (peek_op ("[")) // array
2452 : {
2453 361584 : next ();
2454 361584 : struct arrayindex* ai = new arrayindex;
2455 361584 : ai->tok = t;
2456 :
2457 361584 : if (hop)
2458 30 : ai->base = hop;
2459 : else
2460 361554 : ai->base = sym;
2461 :
2462 244 : while (1)
2463 : {
2464 361828 : ai->indexes.push_back (parse_expression ());
2465 361828 : if (peek_op ("]"))
2466 : {
2467 361584 : next();
2468 : break;
2469 : }
2470 244 : else if (peek_op (","))
2471 : {
2472 244 : next();
2473 : continue;
2474 : }
2475 : else
2476 0 : throw parse_error ("expected ',' or ']'");
2477 : }
2478 361584 : return ai;
2479 : }
2480 :
2481 : // If we got to here, we *should* have a symbol; if we have
2482 : // a hist_op on its own, it doesn't count as an expression,
2483 : // so we throw a parse error.
2484 :
2485 5812957 : if (hop)
2486 0 : throw parse_error("base histogram operator where expression expected", t);
2487 :
2488 5812957 : return sym;
2489 2188 : }
2490 1094 :
|