]> sourceware.org Git - systemtap.git/blame - stapregex-tree.cxx
update NEWS for PR24953
[systemtap.git] / stapregex-tree.cxx
CommitLineData
28a27de2
SM
1// -*- C++ -*-
2// Copyright (C) 2012-2013 Red Hat Inc.
3//
4// This file is part of systemtap, and is free software. You can
5// redistribute it and/or modify it under the terms of the GNU General
6// Public License (GPL); either version 2, or (at your option) any
7// later version.
8//
9// ---
10//
11// This file incorporates code from the re2c project; please see
12// the file README.stapregex for details.
13
14#include <string>
15#include <deque>
1acfc030
JS
16#include <iterator>
17#include <algorithm>
28a27de2
SM
18#include <utility>
19#include <cmath>
179688ce 20#include <cassert>
28a27de2
SM
21
22#include "stapregex-parse.h"
23#include "stapregex-tree.h"
24
25using namespace std;
26
27namespace stapregex {
28
6c9b8d26 29range::range (rchar lb, rchar ub)
28a27de2 30{
248f3856 31 segments.push_back(make_pair(lb,ub));
28a27de2
SM
32}
33
34range::range (const string& str)
35{
36 cursor cur(&str); // no unescaping
37
38 if (cur.finished) return;
39
40 range *ran = stapregex_getrange(cur);
41
42 while (!cur.finished)
43 {
44 range *add = stapregex_getrange(cur);
45 range *new_ran = ( ran != NULL ? range_union(ran, add) : add );
46 delete ran; if (new_ran != add) delete add;
47 ran = new_ran;
48 }
49
50 segments = ran->segments;
51 delete ran;
52}
53
07777235
SM
54void
55range::print (std::ostream& o) const
56{
57 if (segments.empty())
58 {
59 o << "{none}"; // XXX: pick a better pseudo-notation?
60 return;
61 }
62
248f3856 63 if (segments.size() == 1 && segments[0].first == segments[0].second)
07777235 64 {
248f3856 65 print_escaped (o, segments[0].first);
07777235
SM
66 return;
67 }
68
69 o << "[";
70 for (deque<segment>::const_iterator it = segments.begin();
71 it != segments.end(); it++)
72 {
6c9b8d26 73 rchar lb = it->first; rchar ub = it->second;
248f3856
SM
74 if (lb == ub)
75 {
76 print_escaped (o, lb);
77 }
78 else
79 {
80 print_escaped (o, lb);
81 o << "-";
82 print_escaped (o, ub);
83 }
07777235
SM
84 }
85 o << "]";
86}
87
88std::ostream&
89operator << (std::ostream& o, const range& ran)
90{
91 ran.print(o);
92 return o;
93}
94
95std::ostream&
96operator << (std::ostream& o, const range* ran)
97{
98 if (ran)
99 o << *ran;
100 else
101 o << "{none}"; // XXX: pick a better pseudo-notation?
102 return o;
103}
104
105// ------------------------------------------------------------------------
106
28a27de2
SM
107range *
108range_union(range *old_a, range *old_b)
109{
179688ce 110 if (old_a == NULL && old_b == NULL) return NULL;
1acfc030
JS
111 if (old_a == NULL) return new range(*old_b);
112 if (old_b == NULL) return new range(*old_a);
28a27de2
SM
113
114 /* First, gather the segments from both ranges into one sorted pile: */
115 deque<segment> s;
1acfc030
JS
116 merge(old_a->segments.begin(), old_a->segments.end(),
117 old_b->segments.begin(), old_b->segments.end(),
118 inserter(s, s.end()));
28a27de2
SM
119
120 /* Now go through and merge overlapping segments. */
121 range *ran = new range;
122
123 while (!s.empty())
124 {
125 /* Merge adjacent overlapping segments. */
248f3856 126 while (s.size() >= 2 && s[0].second >= s[1].first)
28a27de2
SM
127 {
128 segment merged = make_pair(min(s[0].first, s[1].first),
129 max(s[0].second, s[1].second));
130 s.pop_front(); s.pop_front();
131 s.push_front(merged);
132 }
133
134 /* Place non-overlapping segment in range. */
135 ran->segments.push_back(s.front()); s.pop_front();
136 }
137
138 return ran;
139}
140
141range *
142range_invert(range *old_ran)
143{
144 range ran(*old_ran);
145 range *new_ran = new range;
146
6c9b8d26 147 rchar start = '\1'; // exclude '\0'
28a27de2
SM
148
149 while (!ran.segments.empty()) {
6c9b8d26 150 rchar end = ran.segments.front().first - 1;
28a27de2 151 if (start <= end) new_ran->segments.push_back(make_pair(start, end));
248f3856 152 start = ran.segments.front().second + 1;
28a27de2
SM
153 ran.segments.pop_front();
154 }
155
9d9595d6 156 if ((unsigned)start < (unsigned)NUM_REAL_CHARS)
e5f8ae47 157 new_ran->segments.push_back(make_pair(start, NUM_REAL_CHARS-1));
28a27de2
SM
158
159 return new_ran;
160}
161
162// ------------------------------------------------------------------------
163
20d6e21f
SM
164void
165ins_optimize (ins *i)
166{
167 while (!marked(i))
168 {
169 mark (i); // -- aka "this node has already been optimized"
170
171 if (i->i.tag == CHAR)
172 {
173 i = (ins *) i->i.link; // -- skip node
174 }
175 else if (i->i.tag == GOTO || i->i.tag == FORK)
176 {
177 ins *target = (ins *) i->i.link;
178 ins_optimize(target);
179
180 if (target->i.tag == GOTO)
181 i->i.link = target->i.link == target ? i : target;
182
183 if (i->i.tag == FORK)
184 {
185 ins *follow = (ins *) &i[1];
186 ins_optimize(follow);
187
188 if (follow->i.tag == GOTO && follow->i.link == follow)
189 {
190 i->i.tag = GOTO;
191 }
192 else if (i->i.link == i)
193 {
194 i->i.tag = GOTO;
195 i->i.link = follow;
196 }
197 }
198 }
199 else
200 ++i; // -- skip node
201 }
202}
203
204// ------------------------------------------------------------------------
205
07777235
SM
206const ins*
207show_ins (std::ostream &o, const ins *i, const ins *base)
208{
209 o.width(3); o << (i - base) << ": ";
210
211 const ins *ret = &i[1];
212
213 switch (i->i.tag)
214 {
215 case CHAR:
216 o << "match ";
217 for (; ret < (ins *) i->i.link; ++ret) print_escaped(o, ret->c.value);
218 break;
219
220 case GOTO:
221 o << "goto " << ((ins *) i->i.link - base);
222 break;
223
224 case FORK:
225 o << "fork(" << ( i->i.param ? "prefer" : "avoid" ) << ") "
226 << ((ins *) i->i.link - base);
227 break;
228
229 case ACCEPT:
230 o << "accept(" << i->i.param << ")";
231 break;
232
233 case TAG:
234 o << "tag(" << i->i.param << ")";
235 break;
236
237 case INIT:
238 o << "init";
239 break;
240 }
241
242 return ret;
243}
244
245// ------------------------------------------------------------------------
246
247ins *
248regexp::compile()
249{
248f3856
SM
250 unsigned k = ins_size();
251
252 ins *i = new ins[k + 1];
20d6e21f
SM
253 // XXX Keep Valgrind from complaining in ins_optimize():
254 for (unsigned ix = 0; ix <= k; ix++) i[ix].i.marked = 0;
07777235 255 compile(i);
20d6e21f 256
248f3856
SM
257 // Append an infinite-loop GOTO to avoid edges going outside the array:
258 i[k].i.tag = GOTO;
259 i[k].i.link = &i[k];
260
07777235
SM
261 return i;
262}
263
264std::ostream&
265operator << (std::ostream &o, const regexp& re)
266{
267 re.print (o);
268 return o;
269}
270
271std::ostream&
272operator << (std::ostream &o, const regexp* re)
273{
274 o << *re;
275 return o;
276}
277
278// ------------------------------------------------------------------------
279
280void
281null_op::calc_size()
282{
283 size = 0;
284}
285
286void
dabd71bb 287null_op::compile(ins *)
07777235
SM
288{
289 ;
290}
291
6c9b8d26 292anchor_op::anchor_op(rchar type) : type(type) {}
28a27de2 293
07777235
SM
294void
295anchor_op::calc_size()
296{
297 size = ( type == '^' ? 1 : 2 );
298}
299
300void
301anchor_op::compile(ins *i)
302{
303 if (type == '^')
304 {
305 i->i.tag = INIT;
306 i->i.link = &i[1];
307 }
308 else // type == '$'
309 {
310 i->i.tag = CHAR;
311 i->i.link = &i[2];
312 ins *j = &i[1];
313 j->c.value = '\0';
314 j->c.bump = 1;
315 }
316}
317
28a27de2
SM
318tag_op::tag_op(unsigned id) : id(id) {}
319
07777235
SM
320void
321tag_op::calc_size()
322{
323 size = 1;
324}
325
326void
327tag_op::compile(ins *i)
328{
329 i->i.tag = TAG;
330 i->i.param = id;
331}
332
28a27de2
SM
333match_op::match_op(range *ran) : ran(ran) {}
334
07777235
SM
335void
336match_op::calc_size()
337{
338 size = 1;
339
340 for (deque<segment>::iterator it = ran->segments.begin();
341 it != ran->segments.end(); it++)
342 {
248f3856 343 size += it->second - it->first + 1;
07777235
SM
344 }
345}
346
347void
348match_op::compile(ins *i)
349{
350 unsigned bump = ins_size();
351 i->i.tag = CHAR;
352 i->i.link = &i[bump]; // mark end of table
353
354 ins *j = &i[1];
355 for (deque<segment>::iterator it = ran->segments.begin();
356 it != ran->segments.end(); it++)
357 {
112e9e2b 358 for (unsigned c = it->first; c <= (unsigned) it->second; c++)
07777235
SM
359 {
360 j->c.value = c;
361 j->c.bump = --bump; // mark end of table
362 j++;
363 }
364 }
365}
366
248f3856
SM
367alt_op::alt_op(regexp *a, regexp *b, bool prefer_second)
368 : a(a), b(b), prefer_second(prefer_second) {}
28a27de2 369
07777235
SM
370void
371alt_op::calc_size()
372{
373 size = a->ins_size() + b->ins_size() + 2;
374}
375
376void
377alt_op::compile(ins *i)
378{
379 i->i.tag = FORK;
248f3856 380 i->i.param = prefer_second ? 1 : 0; // preferred alternative to match
07777235
SM
381 ins *j = &i[a->ins_size() + 1];
382 i->i.link = &j[1];
383 a->compile(&i[1]);
384 j->i.tag = GOTO;
385 j->i.link = &j[b->ins_size() + 1];
386 b->compile(&j[1]);
387}
388
28a27de2
SM
389cat_op::cat_op(regexp *a, regexp *b) : a(a), b(b) {}
390
07777235
SM
391void
392cat_op::calc_size()
393{
394 size = a->ins_size() + b->ins_size();
395}
396
397void
398cat_op::compile(ins *i)
399{
400 a->compile(&i[0]);
401 b->compile(&i[a->ins_size()]);
402}
403
248f3856
SM
404close_op::close_op(regexp *re, bool prefer_shorter)
405 : re(re), prefer_shorter(prefer_shorter) {}
28a27de2 406
07777235
SM
407void
408close_op::calc_size()
409{
410 size = re->ins_size() + 1;
411}
412
413void
414close_op::compile(ins *i)
415{
416 re->compile(&i[0]);
417 i += re->ins_size();
418 i->i.tag = FORK;
248f3856 419 i->i.param = prefer_shorter ? 0 : 1; // XXX: match greedily by default
07777235
SM
420 i->i.link = i - re->ins_size();
421}
422
423closev_op::closev_op(regexp *re, int nmin, int nmax)
424 : re(re), nmin(nmin), nmax(nmax) {}
425
426void
427closev_op::calc_size()
428{
429 unsigned k = re->ins_size();
430
431 if (nmax >= 0)
432 size = k * nmin + (nmax - nmin) * (1 + k);
433 else
434 size = k * nmin + 1;
435}
436
437void
438closev_op::compile(ins *i)
439{
440 unsigned k = re->ins_size();
441
442 ins *jumppoint = i + ((nmax - nmin) * (1 + k));
443
444 for (int st = nmin; st < nmax; st++)
445 {
446 i->i.tag = FORK;
447 i->i.param = 0; // XXX: this matches greedily
448 i->i.link = jumppoint;
449 i++;
450 re->compile(&i[0]);
451 i += k;
452 }
453
454 for (int st = 0; st < nmin; st++)
455 {
456 re->compile(&i[0]);
457 i += k;
458
459 if (nmax < 0 && st == 0)
460 {
461 i->i.tag = FORK;
462 i->i.param = 1; // XXX: this matches greedily
463 i->i.link = i - k;
464 i++;
465 }
466 }
467}
468
469rule_op::rule_op(regexp *re, unsigned outcome) : re(re), outcome(outcome) {}
470
471void
472rule_op::calc_size()
473{
474 size = re->ins_size() + 1;
475}
476
477void
478rule_op::compile(ins *i)
479{
480 re->compile(&i[0]);
481 i += re->ins_size();
482 i->i.tag = ACCEPT;
483 i->i.param = outcome;
484}
28a27de2
SM
485
486// ------------------------------------------------------------------------
487
488regexp *
6c9b8d26 489match_char(rchar c)
28a27de2
SM
490{
491 return new match_op(new range(c,c));
492}
493
494regexp *
495str_to_re(const string& str)
496{
497 if (str.empty()) return new null_op;
498
499 regexp *re = match_char(str[0]);
500
501 for (unsigned i = 1; i < str.length(); i++)
502 re = new cat_op(re, match_char(str[i]));
503
504 return re;
505}
506
507regexp *
508do_alt(regexp *a, regexp *b)
509{
510 if (a == NULL) return b;
511 if (b == NULL) return a;
512 return new alt_op(a,b);
513}
514
515regexp *
516make_alt(regexp *a, regexp *b)
517{
518 /* Optimize the case of building alternatives of match_op. */
519 regexp *e1 = NULL, *e2 = NULL;
520 range *r1 = NULL, *r2 = NULL;
521
522 if (a->type_of() == "alt_op")
523 {
524 alt_op *aa = (alt_op *)a;
525 if (aa->a->type_of() == "match_op")
526 {
527 r1 = ((match_op *) aa->a)->ran; e1 = aa->b;
528 }
529 else
530 e1 = a;
531 }
532 else if (a->type_of() == "match_op")
533 {
534 r1 = ((match_op *) a)->ran; e1 = NULL;
535 }
179688ce
SM
536 else
537 e1 = a;
28a27de2
SM
538
539 if (b->type_of() == "alt_op")
540 {
541 alt_op *bb = (alt_op *)b;
542 if (bb->a->type_of() == "match_op")
543 {
544 r2 = ((match_op *) bb->a)->ran; e2 = bb->b;
545 }
546 else
547 e2 = b;
548 }
549 else if (b->type_of() == "match_op")
550 {
551 r2 = ((match_op *) b)->ran; e2 = NULL;
552 }
179688ce
SM
553 else
554 e2 = b;
28a27de2
SM
555
556 range *u = range_union(r1, r2);
557 delete r1; delete r2;
558
559 match_op *m = u != NULL ? new match_op(u) : NULL;
560 regexp *r = do_alt(m, do_alt(e1, e2));
179688ce 561 assert (r != NULL);
28a27de2
SM
562 return r;
563}
564
565regexp *
248f3856 566make_dot(bool allow_zero)
28a27de2 567{
248f3856 568 return new match_op(new range(allow_zero ? 0 : 1, NUM_REAL_CHARS-1));
28a27de2
SM
569}
570
571};
572
573/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.144935 seconds and 5 git commands to generate.