]> sourceware.org Git - systemtap.git/blame - stapregex-tree.cxx
Fixed systemtap-server.exp
[systemtap.git] / stapregex-tree.cxx
CommitLineData
28a27de2
SM
1// -*- C++ -*-
2// Copyright (C) 2012-2013 Red Hat Inc.
3//
4// This file is part of systemtap, and is free software. You can
5// redistribute it and/or modify it under the terms of the GNU General
6// Public License (GPL); either version 2, or (at your option) any
7// later version.
8//
9// ---
10//
11// This file incorporates code from the re2c project; please see
12// the file README.stapregex for details.
13
14#include <string>
15#include <deque>
1acfc030
JS
16#include <iterator>
17#include <algorithm>
28a27de2
SM
18#include <utility>
19#include <cmath>
179688ce 20#include <cassert>
28a27de2
SM
21
22#include "stapregex-parse.h"
23#include "stapregex-tree.h"
24
25using namespace std;
26
27namespace stapregex {
28
248f3856 29range::range (char lb, char ub)
28a27de2 30{
248f3856 31 segments.push_back(make_pair(lb,ub));
28a27de2
SM
32}
33
34range::range (const string& str)
35{
36 cursor cur(&str); // no unescaping
37
38 if (cur.finished) return;
39
40 range *ran = stapregex_getrange(cur);
41
42 while (!cur.finished)
43 {
44 range *add = stapregex_getrange(cur);
45 range *new_ran = ( ran != NULL ? range_union(ran, add) : add );
46 delete ran; if (new_ran != add) delete add;
47 ran = new_ran;
48 }
49
50 segments = ran->segments;
51 delete ran;
52}
53
07777235
SM
54void
55range::print (std::ostream& o) const
56{
57 if (segments.empty())
58 {
59 o << "{none}"; // XXX: pick a better pseudo-notation?
60 return;
61 }
62
248f3856 63 if (segments.size() == 1 && segments[0].first == segments[0].second)
07777235 64 {
248f3856 65 print_escaped (o, segments[0].first);
07777235
SM
66 return;
67 }
68
69 o << "[";
70 for (deque<segment>::const_iterator it = segments.begin();
71 it != segments.end(); it++)
72 {
73 char lb = it->first; char ub = it->second;
248f3856
SM
74 if (lb == ub)
75 {
76 print_escaped (o, lb);
77 }
78 else
79 {
80 print_escaped (o, lb);
81 o << "-";
82 print_escaped (o, ub);
83 }
07777235
SM
84 }
85 o << "]";
86}
87
88std::ostream&
89operator << (std::ostream& o, const range& ran)
90{
91 ran.print(o);
92 return o;
93}
94
95std::ostream&
96operator << (std::ostream& o, const range* ran)
97{
98 if (ran)
99 o << *ran;
100 else
101 o << "{none}"; // XXX: pick a better pseudo-notation?
102 return o;
103}
104
105// ------------------------------------------------------------------------
106
28a27de2
SM
107range *
108range_union(range *old_a, range *old_b)
109{
179688ce 110 if (old_a == NULL && old_b == NULL) return NULL;
1acfc030
JS
111 if (old_a == NULL) return new range(*old_b);
112 if (old_b == NULL) return new range(*old_a);
28a27de2
SM
113
114 /* First, gather the segments from both ranges into one sorted pile: */
115 deque<segment> s;
1acfc030
JS
116 merge(old_a->segments.begin(), old_a->segments.end(),
117 old_b->segments.begin(), old_b->segments.end(),
118 inserter(s, s.end()));
28a27de2
SM
119
120 /* Now go through and merge overlapping segments. */
121 range *ran = new range;
122
123 while (!s.empty())
124 {
125 /* Merge adjacent overlapping segments. */
248f3856 126 while (s.size() >= 2 && s[0].second >= s[1].first)
28a27de2
SM
127 {
128 segment merged = make_pair(min(s[0].first, s[1].first),
129 max(s[0].second, s[1].second));
130 s.pop_front(); s.pop_front();
131 s.push_front(merged);
132 }
133
134 /* Place non-overlapping segment in range. */
135 ran->segments.push_back(s.front()); s.pop_front();
136 }
137
138 return ran;
139}
140
141range *
142range_invert(range *old_ran)
143{
144 range ran(*old_ran);
145 range *new_ran = new range;
146
e5f8ae47 147 char start = '\1'; // exclude '\0'
28a27de2
SM
148
149 while (!ran.segments.empty()) {
e5f8ae47 150 char end = ran.segments.front().first - 1;
28a27de2 151 if (start <= end) new_ran->segments.push_back(make_pair(start, end));
248f3856 152 start = ran.segments.front().second + 1;
28a27de2
SM
153 ran.segments.pop_front();
154 }
155
9d9595d6 156 if ((unsigned)start < (unsigned)NUM_REAL_CHARS)
e5f8ae47 157 new_ran->segments.push_back(make_pair(start, NUM_REAL_CHARS-1));
28a27de2
SM
158
159 return new_ran;
160}
161
162// ------------------------------------------------------------------------
163
07777235
SM
164const ins*
165show_ins (std::ostream &o, const ins *i, const ins *base)
166{
167 o.width(3); o << (i - base) << ": ";
168
169 const ins *ret = &i[1];
170
171 switch (i->i.tag)
172 {
173 case CHAR:
174 o << "match ";
175 for (; ret < (ins *) i->i.link; ++ret) print_escaped(o, ret->c.value);
176 break;
177
178 case GOTO:
179 o << "goto " << ((ins *) i->i.link - base);
180 break;
181
182 case FORK:
183 o << "fork(" << ( i->i.param ? "prefer" : "avoid" ) << ") "
184 << ((ins *) i->i.link - base);
185 break;
186
187 case ACCEPT:
188 o << "accept(" << i->i.param << ")";
189 break;
190
191 case TAG:
192 o << "tag(" << i->i.param << ")";
193 break;
194
195 case INIT:
196 o << "init";
197 break;
198 }
199
200 return ret;
201}
202
203// ------------------------------------------------------------------------
204
205ins *
206regexp::compile()
207{
248f3856
SM
208 unsigned k = ins_size();
209
210 ins *i = new ins[k + 1];
07777235 211 compile(i);
248f3856
SM
212
213 // Append an infinite-loop GOTO to avoid edges going outside the array:
214 i[k].i.tag = GOTO;
215 i[k].i.link = &i[k];
216
07777235
SM
217 return i;
218}
219
220std::ostream&
221operator << (std::ostream &o, const regexp& re)
222{
223 re.print (o);
224 return o;
225}
226
227std::ostream&
228operator << (std::ostream &o, const regexp* re)
229{
230 o << *re;
231 return o;
232}
233
234// ------------------------------------------------------------------------
235
236void
237null_op::calc_size()
238{
239 size = 0;
240}
241
242void
243null_op::compile(ins *i)
244{
245 ;
246}
247
28a27de2
SM
248anchor_op::anchor_op(char type) : type(type) {}
249
07777235
SM
250void
251anchor_op::calc_size()
252{
253 size = ( type == '^' ? 1 : 2 );
254}
255
256void
257anchor_op::compile(ins *i)
258{
259 if (type == '^')
260 {
261 i->i.tag = INIT;
262 i->i.link = &i[1];
263 }
264 else // type == '$'
265 {
266 i->i.tag = CHAR;
267 i->i.link = &i[2];
268 ins *j = &i[1];
269 j->c.value = '\0';
270 j->c.bump = 1;
271 }
272}
273
28a27de2
SM
274tag_op::tag_op(unsigned id) : id(id) {}
275
07777235
SM
276void
277tag_op::calc_size()
278{
279 size = 1;
280}
281
282void
283tag_op::compile(ins *i)
284{
285 i->i.tag = TAG;
286 i->i.param = id;
287}
288
28a27de2
SM
289match_op::match_op(range *ran) : ran(ran) {}
290
07777235
SM
291void
292match_op::calc_size()
293{
294 size = 1;
295
296 for (deque<segment>::iterator it = ran->segments.begin();
297 it != ran->segments.end(); it++)
298 {
248f3856 299 size += it->second - it->first + 1;
07777235
SM
300 }
301}
302
303void
304match_op::compile(ins *i)
305{
306 unsigned bump = ins_size();
307 i->i.tag = CHAR;
308 i->i.link = &i[bump]; // mark end of table
309
310 ins *j = &i[1];
311 for (deque<segment>::iterator it = ran->segments.begin();
312 it != ran->segments.end(); it++)
313 {
112e9e2b 314 for (unsigned c = it->first; c <= (unsigned) it->second; c++)
07777235
SM
315 {
316 j->c.value = c;
317 j->c.bump = --bump; // mark end of table
318 j++;
319 }
320 }
321}
322
248f3856
SM
323alt_op::alt_op(regexp *a, regexp *b, bool prefer_second)
324 : a(a), b(b), prefer_second(prefer_second) {}
28a27de2 325
07777235
SM
326void
327alt_op::calc_size()
328{
329 size = a->ins_size() + b->ins_size() + 2;
330}
331
332void
333alt_op::compile(ins *i)
334{
335 i->i.tag = FORK;
248f3856 336 i->i.param = prefer_second ? 1 : 0; // preferred alternative to match
07777235
SM
337 ins *j = &i[a->ins_size() + 1];
338 i->i.link = &j[1];
339 a->compile(&i[1]);
340 j->i.tag = GOTO;
341 j->i.link = &j[b->ins_size() + 1];
342 b->compile(&j[1]);
343}
344
28a27de2
SM
345cat_op::cat_op(regexp *a, regexp *b) : a(a), b(b) {}
346
07777235
SM
347void
348cat_op::calc_size()
349{
350 size = a->ins_size() + b->ins_size();
351}
352
353void
354cat_op::compile(ins *i)
355{
356 a->compile(&i[0]);
357 b->compile(&i[a->ins_size()]);
358}
359
248f3856
SM
360close_op::close_op(regexp *re, bool prefer_shorter)
361 : re(re), prefer_shorter(prefer_shorter) {}
28a27de2 362
07777235
SM
363void
364close_op::calc_size()
365{
366 size = re->ins_size() + 1;
367}
368
369void
370close_op::compile(ins *i)
371{
372 re->compile(&i[0]);
373 i += re->ins_size();
374 i->i.tag = FORK;
248f3856 375 i->i.param = prefer_shorter ? 0 : 1; // XXX: match greedily by default
07777235
SM
376 i->i.link = i - re->ins_size();
377}
378
379closev_op::closev_op(regexp *re, int nmin, int nmax)
380 : re(re), nmin(nmin), nmax(nmax) {}
381
382void
383closev_op::calc_size()
384{
385 unsigned k = re->ins_size();
386
387 if (nmax >= 0)
388 size = k * nmin + (nmax - nmin) * (1 + k);
389 else
390 size = k * nmin + 1;
391}
392
393void
394closev_op::compile(ins *i)
395{
396 unsigned k = re->ins_size();
397
398 ins *jumppoint = i + ((nmax - nmin) * (1 + k));
399
400 for (int st = nmin; st < nmax; st++)
401 {
402 i->i.tag = FORK;
403 i->i.param = 0; // XXX: this matches greedily
404 i->i.link = jumppoint;
405 i++;
406 re->compile(&i[0]);
407 i += k;
408 }
409
410 for (int st = 0; st < nmin; st++)
411 {
412 re->compile(&i[0]);
413 i += k;
414
415 if (nmax < 0 && st == 0)
416 {
417 i->i.tag = FORK;
418 i->i.param = 1; // XXX: this matches greedily
419 i->i.link = i - k;
420 i++;
421 }
422 }
423}
424
425rule_op::rule_op(regexp *re, unsigned outcome) : re(re), outcome(outcome) {}
426
427void
428rule_op::calc_size()
429{
430 size = re->ins_size() + 1;
431}
432
433void
434rule_op::compile(ins *i)
435{
436 re->compile(&i[0]);
437 i += re->ins_size();
438 i->i.tag = ACCEPT;
439 i->i.param = outcome;
440}
28a27de2
SM
441
442// ------------------------------------------------------------------------
443
444regexp *
445match_char(char c)
446{
447 return new match_op(new range(c,c));
448}
449
450regexp *
451str_to_re(const string& str)
452{
453 if (str.empty()) return new null_op;
454
455 regexp *re = match_char(str[0]);
456
457 for (unsigned i = 1; i < str.length(); i++)
458 re = new cat_op(re, match_char(str[i]));
459
460 return re;
461}
462
463regexp *
464do_alt(regexp *a, regexp *b)
465{
466 if (a == NULL) return b;
467 if (b == NULL) return a;
468 return new alt_op(a,b);
469}
470
471regexp *
472make_alt(regexp *a, regexp *b)
473{
474 /* Optimize the case of building alternatives of match_op. */
475 regexp *e1 = NULL, *e2 = NULL;
476 range *r1 = NULL, *r2 = NULL;
477
478 if (a->type_of() == "alt_op")
479 {
480 alt_op *aa = (alt_op *)a;
481 if (aa->a->type_of() == "match_op")
482 {
483 r1 = ((match_op *) aa->a)->ran; e1 = aa->b;
484 }
485 else
486 e1 = a;
487 }
488 else if (a->type_of() == "match_op")
489 {
490 r1 = ((match_op *) a)->ran; e1 = NULL;
491 }
179688ce
SM
492 else
493 e1 = a;
28a27de2
SM
494
495 if (b->type_of() == "alt_op")
496 {
497 alt_op *bb = (alt_op *)b;
498 if (bb->a->type_of() == "match_op")
499 {
500 r2 = ((match_op *) bb->a)->ran; e2 = bb->b;
501 }
502 else
503 e2 = b;
504 }
505 else if (b->type_of() == "match_op")
506 {
507 r2 = ((match_op *) b)->ran; e2 = NULL;
508 }
179688ce
SM
509 else
510 e2 = b;
28a27de2
SM
511
512 range *u = range_union(r1, r2);
513 delete r1; delete r2;
514
515 match_op *m = u != NULL ? new match_op(u) : NULL;
516 regexp *r = do_alt(m, do_alt(e1, e2));
179688ce 517 assert (r != NULL);
28a27de2
SM
518 return r;
519}
520
521regexp *
248f3856 522make_dot(bool allow_zero)
28a27de2 523{
248f3856 524 return new match_op(new range(allow_zero ? 0 : 1, NUM_REAL_CHARS-1));
28a27de2
SM
525}
526
527};
528
529/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.090059 seconds and 5 git commands to generate.