]>
sourceware.org Git - systemtap.git/blob - stapregex-tree.cxx
2 // Copyright (C) 2012-2013 Red Hat Inc.
4 // This file is part of systemtap, and is free software. You can
5 // redistribute it and/or modify it under the terms of the GNU General
6 // Public License (GPL); either version 2, or (at your option) any
11 // This file incorporates code from the re2c project; please see
12 // the file README.stapregex for details.
22 #include "stapregex-parse.h"
23 #include "stapregex-tree.h"
29 range::range (char lb
, char ub
)
31 segments
.push_back(make_pair(lb
,ub
));
34 range::range (const string
& str
)
36 cursor
cur(&str
); // no unescaping
38 if (cur
.finished
) return;
40 range
*ran
= stapregex_getrange(cur
);
44 range
*add
= stapregex_getrange(cur
);
45 range
*new_ran
= ( ran
!= NULL
? range_union(ran
, add
) : add
);
46 delete ran
; if (new_ran
!= add
) delete add
;
50 segments
= ran
->segments
;
55 range::print (std::ostream
& o
) const
59 o
<< "{none}"; // XXX: pick a better pseudo-notation?
63 if (segments
.size() == 1 && segments
[0].first
== segments
[0].second
)
65 print_escaped (o
, segments
[0].first
);
70 for (deque
<segment
>::const_iterator it
= segments
.begin();
71 it
!= segments
.end(); it
++)
73 char lb
= it
->first
; char ub
= it
->second
;
76 print_escaped (o
, lb
);
80 print_escaped (o
, lb
);
82 print_escaped (o
, ub
);
89 operator << (std::ostream
& o
, const range
& ran
)
96 operator << (std::ostream
& o
, const range
* ran
)
101 o
<< "{none}"; // XXX: pick a better pseudo-notation?
105 // ------------------------------------------------------------------------
108 range_union(range
*old_a
, range
*old_b
)
110 if (old_a
== NULL
&& old_b
== NULL
) return NULL
;
111 if (old_a
== NULL
) return new range(*old_b
);
112 if (old_b
== NULL
) return new range(*old_a
);
114 /* First, gather the segments from both ranges into one sorted pile: */
116 merge(old_a
->segments
.begin(), old_a
->segments
.end(),
117 old_b
->segments
.begin(), old_b
->segments
.end(),
118 inserter(s
, s
.end()));
120 /* Now go through and merge overlapping segments. */
121 range
*ran
= new range
;
125 /* Merge adjacent overlapping segments. */
126 while (s
.size() >= 2 && s
[0].second
>= s
[1].first
)
128 segment merged
= make_pair(min(s
[0].first
, s
[1].first
),
129 max(s
[0].second
, s
[1].second
));
130 s
.pop_front(); s
.pop_front();
131 s
.push_front(merged
);
134 /* Place non-overlapping segment in range. */
135 ran
->segments
.push_back(s
.front()); s
.pop_front();
142 range_invert(range
*old_ran
)
145 range
*new_ran
= new range
;
147 char start
= '\1'; // exclude '\0'
149 while (!ran
.segments
.empty()) {
150 char end
= ran
.segments
.front().first
- 1;
151 if (start
<= end
) new_ran
->segments
.push_back(make_pair(start
, end
));
152 start
= ran
.segments
.front().second
+ 1;
153 ran
.segments
.pop_front();
156 if ((unsigned)start
< (unsigned)NUM_REAL_CHARS
)
157 new_ran
->segments
.push_back(make_pair(start
, NUM_REAL_CHARS
-1));
162 // ------------------------------------------------------------------------
165 show_ins (std::ostream
&o
, const ins
*i
, const ins
*base
)
167 o
.width(3); o
<< (i
- base
) << ": ";
169 const ins
*ret
= &i
[1];
175 for (; ret
< (ins
*) i
->i
.link
; ++ret
) print_escaped(o
, ret
->c
.value
);
179 o
<< "goto " << ((ins
*) i
->i
.link
- base
);
183 o
<< "fork(" << ( i
->i
.param
? "prefer" : "avoid" ) << ") "
184 << ((ins
*) i
->i
.link
- base
);
188 o
<< "accept(" << i
->i
.param
<< ")";
192 o
<< "tag(" << i
->i
.param
<< ")";
203 // ------------------------------------------------------------------------
208 unsigned k
= ins_size();
210 ins
*i
= new ins
[k
+ 1];
213 // Append an infinite-loop GOTO to avoid edges going outside the array:
221 operator << (std::ostream
&o
, const regexp
& re
)
228 operator << (std::ostream
&o
, const regexp
* re
)
234 // ------------------------------------------------------------------------
243 null_op::compile(ins
*i
)
248 anchor_op::anchor_op(char type
) : type(type
) {}
251 anchor_op::calc_size()
253 size
= ( type
== '^' ? 1 : 2 );
257 anchor_op::compile(ins
*i
)
274 tag_op::tag_op(unsigned id
) : id(id
) {}
283 tag_op::compile(ins
*i
)
289 match_op::match_op(range
*ran
) : ran(ran
) {}
292 match_op::calc_size()
296 for (deque
<segment
>::iterator it
= ran
->segments
.begin();
297 it
!= ran
->segments
.end(); it
++)
299 size
+= it
->second
- it
->first
+ 1;
304 match_op::compile(ins
*i
)
306 unsigned bump
= ins_size();
308 i
->i
.link
= &i
[bump
]; // mark end of table
311 for (deque
<segment
>::iterator it
= ran
->segments
.begin();
312 it
!= ran
->segments
.end(); it
++)
314 for (unsigned c
= it
->first
; c
<= (unsigned) it
->second
; c
++)
317 j
->c
.bump
= --bump
; // mark end of table
323 alt_op::alt_op(regexp
*a
, regexp
*b
, bool prefer_second
)
324 : a(a
), b(b
), prefer_second(prefer_second
) {}
329 size
= a
->ins_size() + b
->ins_size() + 2;
333 alt_op::compile(ins
*i
)
336 i
->i
.param
= prefer_second
? 1 : 0; // preferred alternative to match
337 ins
*j
= &i
[a
->ins_size() + 1];
341 j
->i
.link
= &j
[b
->ins_size() + 1];
345 cat_op::cat_op(regexp
*a
, regexp
*b
) : a(a
), b(b
) {}
350 size
= a
->ins_size() + b
->ins_size();
354 cat_op::compile(ins
*i
)
357 b
->compile(&i
[a
->ins_size()]);
360 close_op::close_op(regexp
*re
, bool prefer_shorter
)
361 : re(re
), prefer_shorter(prefer_shorter
) {}
364 close_op::calc_size()
366 size
= re
->ins_size() + 1;
370 close_op::compile(ins
*i
)
375 i
->i
.param
= prefer_shorter
? 0 : 1; // XXX: match greedily by default
376 i
->i
.link
= i
- re
->ins_size();
379 closev_op::closev_op(regexp
*re
, int nmin
, int nmax
)
380 : re(re
), nmin(nmin
), nmax(nmax
) {}
383 closev_op::calc_size()
385 unsigned k
= re
->ins_size();
388 size
= k
* nmin
+ (nmax
- nmin
) * (1 + k
);
394 closev_op::compile(ins
*i
)
396 unsigned k
= re
->ins_size();
398 ins
*jumppoint
= i
+ ((nmax
- nmin
) * (1 + k
));
400 for (int st
= nmin
; st
< nmax
; st
++)
403 i
->i
.param
= 0; // XXX: this matches greedily
404 i
->i
.link
= jumppoint
;
410 for (int st
= 0; st
< nmin
; st
++)
415 if (nmax
< 0 && st
== 0)
418 i
->i
.param
= 1; // XXX: this matches greedily
425 rule_op::rule_op(regexp
*re
, unsigned outcome
) : re(re
), outcome(outcome
) {}
430 size
= re
->ins_size() + 1;
434 rule_op::compile(ins
*i
)
439 i
->i
.param
= outcome
;
442 // ------------------------------------------------------------------------
447 return new match_op(new range(c
,c
));
451 str_to_re(const string
& str
)
453 if (str
.empty()) return new null_op
;
455 regexp
*re
= match_char(str
[0]);
457 for (unsigned i
= 1; i
< str
.length(); i
++)
458 re
= new cat_op(re
, match_char(str
[i
]));
464 do_alt(regexp
*a
, regexp
*b
)
466 if (a
== NULL
) return b
;
467 if (b
== NULL
) return a
;
468 return new alt_op(a
,b
);
472 make_alt(regexp
*a
, regexp
*b
)
474 /* Optimize the case of building alternatives of match_op. */
475 regexp
*e1
= NULL
, *e2
= NULL
;
476 range
*r1
= NULL
, *r2
= NULL
;
478 if (a
->type_of() == "alt_op")
480 alt_op
*aa
= (alt_op
*)a
;
481 if (aa
->a
->type_of() == "match_op")
483 r1
= ((match_op
*) aa
->a
)->ran
; e1
= aa
->b
;
488 else if (a
->type_of() == "match_op")
490 r1
= ((match_op
*) a
)->ran
; e1
= NULL
;
495 if (b
->type_of() == "alt_op")
497 alt_op
*bb
= (alt_op
*)b
;
498 if (bb
->a
->type_of() == "match_op")
500 r2
= ((match_op
*) bb
->a
)->ran
; e2
= bb
->b
;
505 else if (b
->type_of() == "match_op")
507 r2
= ((match_op
*) b
)->ran
; e2
= NULL
;
512 range
*u
= range_union(r1
, r2
);
513 delete r1
; delete r2
;
515 match_op
*m
= u
!= NULL
? new match_op(u
) : NULL
;
516 regexp
*r
= do_alt(m
, do_alt(e1
, e2
));
522 make_dot(bool allow_zero
)
524 return new match_op(new range(allow_zero
? 0 : 1, NUM_REAL_CHARS
-1));
529 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.059644 seconds and 5 git commands to generate.