]> sourceware.org Git - systemtap.git/blob - stapregex-tree.cxx
Define __NR_epoll_wait if not defined (PR17462)
[systemtap.git] / stapregex-tree.cxx
1 // -*- C++ -*-
2 // Copyright (C) 2012-2013 Red Hat Inc.
3 //
4 // This file is part of systemtap, and is free software. You can
5 // redistribute it and/or modify it under the terms of the GNU General
6 // Public License (GPL); either version 2, or (at your option) any
7 // later version.
8 //
9 // ---
10 //
11 // This file incorporates code from the re2c project; please see
12 // the file README.stapregex for details.
13
14 #include <string>
15 #include <deque>
16 #include <iterator>
17 #include <algorithm>
18 #include <utility>
19 #include <cmath>
20 #include <cassert>
21
22 #include "stapregex-parse.h"
23 #include "stapregex-tree.h"
24
25 using namespace std;
26
27 namespace stapregex {
28
29 range::range (char lb, char ub)
30 {
31 segments.push_back(make_pair(lb,ub));
32 }
33
34 range::range (const string& str)
35 {
36 cursor cur(&str); // no unescaping
37
38 if (cur.finished) return;
39
40 range *ran = stapregex_getrange(cur);
41
42 while (!cur.finished)
43 {
44 range *add = stapregex_getrange(cur);
45 range *new_ran = ( ran != NULL ? range_union(ran, add) : add );
46 delete ran; if (new_ran != add) delete add;
47 ran = new_ran;
48 }
49
50 segments = ran->segments;
51 delete ran;
52 }
53
54 void
55 range::print (std::ostream& o) const
56 {
57 if (segments.empty())
58 {
59 o << "{none}"; // XXX: pick a better pseudo-notation?
60 return;
61 }
62
63 if (segments.size() == 1 && segments[0].first == segments[0].second)
64 {
65 print_escaped (o, segments[0].first);
66 return;
67 }
68
69 o << "[";
70 for (deque<segment>::const_iterator it = segments.begin();
71 it != segments.end(); it++)
72 {
73 char lb = it->first; char ub = it->second;
74 if (lb == ub)
75 {
76 print_escaped (o, lb);
77 }
78 else
79 {
80 print_escaped (o, lb);
81 o << "-";
82 print_escaped (o, ub);
83 }
84 }
85 o << "]";
86 }
87
88 std::ostream&
89 operator << (std::ostream& o, const range& ran)
90 {
91 ran.print(o);
92 return o;
93 }
94
95 std::ostream&
96 operator << (std::ostream& o, const range* ran)
97 {
98 if (ran)
99 o << *ran;
100 else
101 o << "{none}"; // XXX: pick a better pseudo-notation?
102 return o;
103 }
104
105 // ------------------------------------------------------------------------
106
107 range *
108 range_union(range *old_a, range *old_b)
109 {
110 if (old_a == NULL && old_b == NULL) return NULL;
111 if (old_a == NULL) return new range(*old_b);
112 if (old_b == NULL) return new range(*old_a);
113
114 /* First, gather the segments from both ranges into one sorted pile: */
115 deque<segment> s;
116 merge(old_a->segments.begin(), old_a->segments.end(),
117 old_b->segments.begin(), old_b->segments.end(),
118 inserter(s, s.end()));
119
120 /* Now go through and merge overlapping segments. */
121 range *ran = new range;
122
123 while (!s.empty())
124 {
125 /* Merge adjacent overlapping segments. */
126 while (s.size() >= 2 && s[0].second >= s[1].first)
127 {
128 segment merged = make_pair(min(s[0].first, s[1].first),
129 max(s[0].second, s[1].second));
130 s.pop_front(); s.pop_front();
131 s.push_front(merged);
132 }
133
134 /* Place non-overlapping segment in range. */
135 ran->segments.push_back(s.front()); s.pop_front();
136 }
137
138 return ran;
139 }
140
141 range *
142 range_invert(range *old_ran)
143 {
144 range ran(*old_ran);
145 range *new_ran = new range;
146
147 char start = '\1'; // exclude '\0'
148
149 while (!ran.segments.empty()) {
150 char end = ran.segments.front().first - 1;
151 if (start <= end) new_ran->segments.push_back(make_pair(start, end));
152 start = ran.segments.front().second + 1;
153 ran.segments.pop_front();
154 }
155
156 if ((unsigned)start < (unsigned)NUM_REAL_CHARS)
157 new_ran->segments.push_back(make_pair(start, NUM_REAL_CHARS-1));
158
159 return new_ran;
160 }
161
162 // ------------------------------------------------------------------------
163
164 const ins*
165 show_ins (std::ostream &o, const ins *i, const ins *base)
166 {
167 o.width(3); o << (i - base) << ": ";
168
169 const ins *ret = &i[1];
170
171 switch (i->i.tag)
172 {
173 case CHAR:
174 o << "match ";
175 for (; ret < (ins *) i->i.link; ++ret) print_escaped(o, ret->c.value);
176 break;
177
178 case GOTO:
179 o << "goto " << ((ins *) i->i.link - base);
180 break;
181
182 case FORK:
183 o << "fork(" << ( i->i.param ? "prefer" : "avoid" ) << ") "
184 << ((ins *) i->i.link - base);
185 break;
186
187 case ACCEPT:
188 o << "accept(" << i->i.param << ")";
189 break;
190
191 case TAG:
192 o << "tag(" << i->i.param << ")";
193 break;
194
195 case INIT:
196 o << "init";
197 break;
198 }
199
200 return ret;
201 }
202
203 // ------------------------------------------------------------------------
204
205 ins *
206 regexp::compile()
207 {
208 unsigned k = ins_size();
209
210 ins *i = new ins[k + 1];
211 compile(i);
212
213 // Append an infinite-loop GOTO to avoid edges going outside the array:
214 i[k].i.tag = GOTO;
215 i[k].i.link = &i[k];
216
217 return i;
218 }
219
220 std::ostream&
221 operator << (std::ostream &o, const regexp& re)
222 {
223 re.print (o);
224 return o;
225 }
226
227 std::ostream&
228 operator << (std::ostream &o, const regexp* re)
229 {
230 o << *re;
231 return o;
232 }
233
234 // ------------------------------------------------------------------------
235
236 void
237 null_op::calc_size()
238 {
239 size = 0;
240 }
241
242 void
243 null_op::compile(ins *i)
244 {
245 ;
246 }
247
248 anchor_op::anchor_op(char type) : type(type) {}
249
250 void
251 anchor_op::calc_size()
252 {
253 size = ( type == '^' ? 1 : 2 );
254 }
255
256 void
257 anchor_op::compile(ins *i)
258 {
259 if (type == '^')
260 {
261 i->i.tag = INIT;
262 i->i.link = &i[1];
263 }
264 else // type == '$'
265 {
266 i->i.tag = CHAR;
267 i->i.link = &i[2];
268 ins *j = &i[1];
269 j->c.value = '\0';
270 j->c.bump = 1;
271 }
272 }
273
274 tag_op::tag_op(unsigned id) : id(id) {}
275
276 void
277 tag_op::calc_size()
278 {
279 size = 1;
280 }
281
282 void
283 tag_op::compile(ins *i)
284 {
285 i->i.tag = TAG;
286 i->i.param = id;
287 }
288
289 match_op::match_op(range *ran) : ran(ran) {}
290
291 void
292 match_op::calc_size()
293 {
294 size = 1;
295
296 for (deque<segment>::iterator it = ran->segments.begin();
297 it != ran->segments.end(); it++)
298 {
299 size += it->second - it->first + 1;
300 }
301 }
302
303 void
304 match_op::compile(ins *i)
305 {
306 unsigned bump = ins_size();
307 i->i.tag = CHAR;
308 i->i.link = &i[bump]; // mark end of table
309
310 ins *j = &i[1];
311 for (deque<segment>::iterator it = ran->segments.begin();
312 it != ran->segments.end(); it++)
313 {
314 for (unsigned c = it->first; c <= (unsigned) it->second; c++)
315 {
316 j->c.value = c;
317 j->c.bump = --bump; // mark end of table
318 j++;
319 }
320 }
321 }
322
323 alt_op::alt_op(regexp *a, regexp *b, bool prefer_second)
324 : a(a), b(b), prefer_second(prefer_second) {}
325
326 void
327 alt_op::calc_size()
328 {
329 size = a->ins_size() + b->ins_size() + 2;
330 }
331
332 void
333 alt_op::compile(ins *i)
334 {
335 i->i.tag = FORK;
336 i->i.param = prefer_second ? 1 : 0; // preferred alternative to match
337 ins *j = &i[a->ins_size() + 1];
338 i->i.link = &j[1];
339 a->compile(&i[1]);
340 j->i.tag = GOTO;
341 j->i.link = &j[b->ins_size() + 1];
342 b->compile(&j[1]);
343 }
344
345 cat_op::cat_op(regexp *a, regexp *b) : a(a), b(b) {}
346
347 void
348 cat_op::calc_size()
349 {
350 size = a->ins_size() + b->ins_size();
351 }
352
353 void
354 cat_op::compile(ins *i)
355 {
356 a->compile(&i[0]);
357 b->compile(&i[a->ins_size()]);
358 }
359
360 close_op::close_op(regexp *re, bool prefer_shorter)
361 : re(re), prefer_shorter(prefer_shorter) {}
362
363 void
364 close_op::calc_size()
365 {
366 size = re->ins_size() + 1;
367 }
368
369 void
370 close_op::compile(ins *i)
371 {
372 re->compile(&i[0]);
373 i += re->ins_size();
374 i->i.tag = FORK;
375 i->i.param = prefer_shorter ? 0 : 1; // XXX: match greedily by default
376 i->i.link = i - re->ins_size();
377 }
378
379 closev_op::closev_op(regexp *re, int nmin, int nmax)
380 : re(re), nmin(nmin), nmax(nmax) {}
381
382 void
383 closev_op::calc_size()
384 {
385 unsigned k = re->ins_size();
386
387 if (nmax >= 0)
388 size = k * nmin + (nmax - nmin) * (1 + k);
389 else
390 size = k * nmin + 1;
391 }
392
393 void
394 closev_op::compile(ins *i)
395 {
396 unsigned k = re->ins_size();
397
398 ins *jumppoint = i + ((nmax - nmin) * (1 + k));
399
400 for (int st = nmin; st < nmax; st++)
401 {
402 i->i.tag = FORK;
403 i->i.param = 0; // XXX: this matches greedily
404 i->i.link = jumppoint;
405 i++;
406 re->compile(&i[0]);
407 i += k;
408 }
409
410 for (int st = 0; st < nmin; st++)
411 {
412 re->compile(&i[0]);
413 i += k;
414
415 if (nmax < 0 && st == 0)
416 {
417 i->i.tag = FORK;
418 i->i.param = 1; // XXX: this matches greedily
419 i->i.link = i - k;
420 i++;
421 }
422 }
423 }
424
425 rule_op::rule_op(regexp *re, unsigned outcome) : re(re), outcome(outcome) {}
426
427 void
428 rule_op::calc_size()
429 {
430 size = re->ins_size() + 1;
431 }
432
433 void
434 rule_op::compile(ins *i)
435 {
436 re->compile(&i[0]);
437 i += re->ins_size();
438 i->i.tag = ACCEPT;
439 i->i.param = outcome;
440 }
441
442 // ------------------------------------------------------------------------
443
444 regexp *
445 match_char(char c)
446 {
447 return new match_op(new range(c,c));
448 }
449
450 regexp *
451 str_to_re(const string& str)
452 {
453 if (str.empty()) return new null_op;
454
455 regexp *re = match_char(str[0]);
456
457 for (unsigned i = 1; i < str.length(); i++)
458 re = new cat_op(re, match_char(str[i]));
459
460 return re;
461 }
462
463 regexp *
464 do_alt(regexp *a, regexp *b)
465 {
466 if (a == NULL) return b;
467 if (b == NULL) return a;
468 return new alt_op(a,b);
469 }
470
471 regexp *
472 make_alt(regexp *a, regexp *b)
473 {
474 /* Optimize the case of building alternatives of match_op. */
475 regexp *e1 = NULL, *e2 = NULL;
476 range *r1 = NULL, *r2 = NULL;
477
478 if (a->type_of() == "alt_op")
479 {
480 alt_op *aa = (alt_op *)a;
481 if (aa->a->type_of() == "match_op")
482 {
483 r1 = ((match_op *) aa->a)->ran; e1 = aa->b;
484 }
485 else
486 e1 = a;
487 }
488 else if (a->type_of() == "match_op")
489 {
490 r1 = ((match_op *) a)->ran; e1 = NULL;
491 }
492 else
493 e1 = a;
494
495 if (b->type_of() == "alt_op")
496 {
497 alt_op *bb = (alt_op *)b;
498 if (bb->a->type_of() == "match_op")
499 {
500 r2 = ((match_op *) bb->a)->ran; e2 = bb->b;
501 }
502 else
503 e2 = b;
504 }
505 else if (b->type_of() == "match_op")
506 {
507 r2 = ((match_op *) b)->ran; e2 = NULL;
508 }
509 else
510 e2 = b;
511
512 range *u = range_union(r1, r2);
513 delete r1; delete r2;
514
515 match_op *m = u != NULL ? new match_op(u) : NULL;
516 regexp *r = do_alt(m, do_alt(e1, e2));
517 assert (r != NULL);
518 return r;
519 }
520
521 regexp *
522 make_dot(bool allow_zero)
523 {
524 return new match_op(new range(allow_zero ? 0 : 1, NUM_REAL_CHARS-1));
525 }
526
527 };
528
529 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.059644 seconds and 5 git commands to generate.