]> sourceware.org Git - systemtap.git/blob - stapregex-dfa.h
stapbpf PR24543 oops in previous commit (mark_active_cpus)
[systemtap.git] / stapregex-dfa.h
1 // -*- C++ -*-
2 // Copyright (C) 2012-2018 Red Hat Inc.
3 //
4 // This file is part of systemtap, and is free software. You can
5 // redistribute it and/or modify it under the terms of the GNU General
6 // Public License (GPL); either version 2, or (at your option) any
7 // later version.
8 //
9 // ---
10 //
11 // This file incorporates code from the re2c project; please see
12 // the file README.stapregex for details.
13
14 #ifndef STAPREGEX_DFA_H
15 #define STAPREGEX_DFA_H
16
17 #include <string>
18 #include <iostream>
19 #include <set>
20 #include <list>
21 #include <vector>
22 #include <queue>
23 #include <utility>
24
25 #include "stapregex-defines.h"
26
27 class translator_output; /* from translator-output.h */
28
29 namespace stapregex {
30
31 struct regexp; /* from stapregex-tree.h */
32 union ins; /* from stapregex-tree.h */
33
34 struct dfa;
35 struct state;
36
37 /* Coordinates of a subexpression map item m[t,s]: */
38 typedef std::pair<unsigned, unsigned> map_item;
39
40 std::ostream& operator << (std::ostream &o, const map_item& m);
41
42 /* A tagged DFA transition can have a number of these instructions
43 attached, which are able to assign the current position to specific
44 map items, or to reorder the existing elements of the map: */
45 struct tdfa_insn {
46 map_item to, from;
47 bool save_tag; // -- if true, copy from's value to final tag
48 bool save_pos; // -- if true, assign cur position; else, copy from's value
49 };
50 typedef std::list<tdfa_insn> tdfa_action;
51
52 std::ostream& operator << (std::ostream &o, const tdfa_action& a);
53
54 /* The arc_priority data type is a cunning way to represent transition
55 priorities, necessitated by the fact that we have FORK opcodes (two
56 outgoing e-transitions) which can lead to further FORK opcodes, &c,
57 requiring a binary-subdivision style of priority assignment:
58
59 -> 3/4 ... and so forth
60 /
61 -> 1/2 --> 2/4 ... and so forth
62 /
63 / ---> 1/4 ... and so forth
64 / /
65 0 ----> 0 ----> 0 ... and so forth
66
67 Our trick is pretty much just to allocate the possible values of an
68 unsigned long in binary-search fashion.
69
70 XXX: For a 64-bit unsigned long type, this allows a chain of FORK
71 opcodes around 64 units long (without intervening CHAR match
72 insns), at which point things start to get funky. Be sure to keep
73 an eye on whether this turns out to be enough in practice.
74
75 TODOXXX: May want to test how this plays out in 32-bit architectures. */
76 typedef std::pair<unsigned long long, unsigned> arc_priority;
77 #define MAKE_START_PRIORITY make_pair(0,0)
78 arc_priority refine_higher(const arc_priority& a);
79 arc_priority refine_lower(const arc_priority& a);
80 int arc_compare(const arc_priority& a, const arc_priority& b);
81
82 std::ostream& operator << (std::ostream &o, const arc_priority& p);
83
84 /* When constructing tagged DFA sets from ins, we need to keep track
85 of a set of instructions together with further bookkeeping
86 information (relative preference/priority, map items affected). */
87 struct kernel_point {
88 ins *i;
89 arc_priority priority; // -- used in tagged e-closure computation
90 std::list<map_item> map_items;
91 std::set<ins *> parents; // -- used for infinite-loop-detection
92 void print (std::ostream &o, ins *base) const;
93 };
94 typedef std::list<kernel_point> state_kernel; // TODO: does it make sense to have duplicate ins inside a state-kernel?
95
96 /* Corresponds to a tagged-DFA transition arc, complete with
97 subexpression map reordering and such. */
98 struct span {
99 rchar lb, ub; // -- segment [lb, ub]
100 state *to;
101 tdfa_action action;
102 state_kernel *jump_pairs; // -- kernel_points that jump to this span
103 state_kernel *reach_pairs; // -- starting point for te_closure computation
104
105 void emit_jump (translator_output *o, const dfa *d) const;
106 void emit_final (translator_output *o, const dfa *d) const;
107 };
108
109 struct state {
110 dfa *owner; // -- dfa state was made for (XXX may not contain state yet)
111 unsigned label; // -- index of state in dfa
112 state *next; // -- store dfa states as a linked list
113 state_kernel *kernel; // -- set of corresponding ins coordinates
114 /* NB: our usage of the term 'kernel' differs from re2c's slightly
115 -- there is no real need to distinguish NFA edges inside the
116 state from outgoing edges, (XXX) as far as I am aware. */
117
118 bool accepts; // -- is this a final state?
119 unsigned accept_outcome;
120 kernel_point *accept_kp;
121 tdfa_action finalizer; // -- run after accepting
122
123 std::list<span> spans;
124
125 state (dfa *dfa, state_kernel *kernel);
126
127 void emit (translator_output *o, const dfa *d) const;
128
129 void print (translator_output *o) const;
130 void print (std::ostream& o) const;
131 };
132
133 std::ostream& operator << (std::ostream &o, const state* s);
134
135 // ------------------------------------------------------------------------
136
137 struct dfa {
138 ins *orig_nfa;
139 state *first, *last; // -- store dfa states as a linked list
140 unsigned nstates;
141
142 // Infrastructure to deal with tagging:
143 unsigned nmapitems;
144 unsigned ntags;
145 tdfa_action initializer; // -- run before entering start state
146 std::vector<std::string> outcome_snippets;
147
148 // When using tagged DFAs, record indices of the success and failure outcome:
149 int success_outcome;
150 int fail_outcome;
151
152 dfa (ins *i, int ntags, std::vector<std::string>& outcome_snippets,
153 int accept_outcome = 1);
154 ~dfa ();
155
156 void emit (translator_output *o) const;
157
158 void emit_action (translator_output *o, const tdfa_action &act) const;
159 void emit_tagsave (translator_output *o, std::string tag_states,
160 std::string tag_vals, std::string tag_count) const;
161
162 void print (translator_output *o) const;
163 void print (std::ostream& o) const;
164
165 private:
166 void add_map_item (const map_item &m);
167 state *add_state (state* s);
168 state *find_equivalent (state *s, tdfa_action &r);
169 tdfa_action compute_action (state_kernel *old_k, state_kernel *new_k);
170 tdfa_action compute_finalizer (state *s);
171 };
172
173 std::ostream& operator << (std::ostream &o, const dfa& d);
174 std::ostream& operator << (std::ostream &o, const dfa* d);
175
176 /* Produces a dfa that runs the specified code snippets based on match
177 or fail outcomes for an unanchored (by default) match of re. */
178 dfa *stapregex_compile (regexp *re, const std::string& match_snippet, const std::string& fail_snippet);
179
180 };
181
182 #endif
183
184 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.04375 seconds and 5 git commands to generate.