]> sourceware.org Git - systemtap.git/blob - stapregex-dfa.h
Use std::thread for threading and CPU counts
[systemtap.git] / stapregex-dfa.h
1 // -*- C++ -*-
2 // Copyright (C) 2012-2013 Red Hat Inc.
3 //
4 // This file is part of systemtap, and is free software. You can
5 // redistribute it and/or modify it under the terms of the GNU General
6 // Public License (GPL); either version 2, or (at your option) any
7 // later version.
8 //
9 // ---
10 //
11 // This file incorporates code from the re2c project; please see
12 // the file README.stapregex for details.
13
14 #ifndef STAPREGEX_DFA_H
15 #define STAPREGEX_DFA_H
16
17 #include <string>
18 #include <iostream>
19 #include <set>
20 #include <list>
21 #include <vector>
22 #include <queue>
23 #include <utility>
24
25 class translator_output; /* from translator-output.h */
26
27 namespace stapregex {
28
29 struct regexp; /* from stapregex-tree.h */
30 union ins; /* from stapregex-tree.h */
31
32 struct dfa;
33 struct state;
34
35 /* Coordinates of a subexpression map item m[t,s]: */
36 typedef std::pair<unsigned, unsigned> map_item;
37
38 /* A tagged DFA transition can have a number of these instructions
39 attached, which are able to assign the current position to specific
40 map items, or to reorder the existing elements of the map: */
41 struct tdfa_insn {
42 map_item to, from;
43 bool save_pos; // -- if true, assign position; if false, copy from's value
44 };
45 typedef std::list<tdfa_insn> tdfa_action;
46
47 std::ostream& operator << (std::ostream &o, const tdfa_action& a);
48
49 /* The arc_priority data type is a cunning way to represent transition
50 priorities, necessitated by the fact that we have FORK opcodes (two
51 outgoing e-transitions) which can lead to further FORK opcodes, &c,
52 requiring a binary-subdivision style of priority assignment:
53
54 -> 3/4 ... and so forth
55 /
56 -> 1/2 --> 2/4 ... and so forth
57 /
58 / ---> 1/4 ... and so forth
59 / /
60 0 ----> 0 ----> 0 ... and so forth
61
62 Our trick is pretty much just to allocate the possible values of an
63 unsigned long in binary-search fashion.
64
65 XXX: For a 64-bit unsigned long type, this allows a chain of FORK
66 opcodes around 64 units long (without intervening CHAR match
67 insns), at which point things start to get funky. Be sure to keep
68 an eye on whether this turns out to be enough in practice. */
69 typedef std::pair<unsigned long, unsigned> arc_priority;
70 arc_priority refine_higher(const arc_priority& a);
71 arc_priority refine_lower(const arc_priority& a);
72 int arc_compare(const arc_priority& a, const arc_priority& b);
73
74 std::ostream& operator << (std::ostream &o, const arc_priority& p);
75
76 /* When constructing tagged DFA sets from ins, we need to keep track
77 of a set of instructions together with further bookkeeping
78 information (relative preference/priority, map items affected). */
79 struct kernel_point {
80 ins *i;
81 arc_priority priority; // -- used in tagged e-closure computation
82 std::list<map_item> map_items;
83 std::set<ins *> parents; // -- used for infinite-loop-detection
84 };
85 typedef std::list<kernel_point> state_kernel;
86
87 /* Corresponds to a tagged-DFA transition arc, complete with
88 subexpression map reordering and such. */
89 struct span {
90 char lb, ub; // -- segment [lb, ub]
91 state *to;
92 tdfa_action action;
93 state_kernel *reach_pairs; // -- for the subset-construction
94 // -- algorithm
95
96 void emit_jump (translator_output *o, const dfa *d) const;
97 void emit_final (translator_output *o, const dfa *d) const;
98 };
99
100 struct state {
101 unsigned label; // -- index of state in dfa
102 state *next; // -- store dfa states as a linked list
103 state_kernel *kernel; // -- set of corresponding ins coordinates
104 /* NB: our usage of the term 'kernel' differs from re2c's slightly
105 -- there is no real need to distinguish NFA edges inside the
106 state from outgoing edges, (XXX) as far as I am aware. */
107
108 bool accepts; // -- is this a final state?
109 unsigned accept_outcome;
110 tdfa_action finalizer; // -- run after accepting
111
112 std::list<span> spans;
113
114 state (state_kernel *kernel);
115
116 void emit (translator_output *o, const dfa *d) const;
117
118 void print (translator_output *o) const;
119 };
120
121 // ------------------------------------------------------------------------
122
123 struct dfa {
124 ins *orig_nfa;
125 state *first, *last; // -- store dfa states as a linked list
126 unsigned nstates;
127
128 // Infrastructure to deal with tagging:
129 unsigned ntags;
130 tdfa_action initializer; // -- run before entering start state
131 std::vector<std::string> outcome_snippets;
132
133 dfa (ins *i, int ntags, std::vector<std::string>& outcome_snippets);
134 ~dfa ();
135
136 void emit (translator_output *o) const;
137 void emit_tagsave (translator_output *o, std::string tag_states,
138 std::string tag_vals, std::string tag_count) const;
139
140 void print (translator_output *o) const;
141 void print (std::ostream& o) const;
142
143 private:
144 state *add_state (state* s);
145 state *find_equivalent (state *s, tdfa_action &r);
146 };
147
148 std::ostream& operator << (std::ostream &o, const dfa& d);
149 std::ostream& operator << (std::ostream &o, const dfa* d);
150
151 /* Produces a dfa that runs the specified code snippets based on match
152 or fail outcomes for an unanchored (by default) match of re. */
153 dfa *stapregex_compile (regexp *re, const std::string& match_snippet, const std::string& fail_snippet);
154
155 };
156
157 #endif
158
159 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.042894 seconds and 5 git commands to generate.