]>
Commit | Line | Data |
---|---|---|
07777235 SM |
1 | // -*- C++ -*- |
2 | // Copyright (C) 2012-2013 Red Hat Inc. | |
3 | // | |
4 | // This file is part of systemtap, and is free software. You can | |
5 | // redistribute it and/or modify it under the terms of the GNU General | |
6 | // Public License (GPL); either version 2, or (at your option) any | |
7 | // later version. | |
8 | // | |
9 | // --- | |
10 | // | |
11 | // This file incorporates code from the re2c project; please see | |
12 | // the file README.stapregex for details. | |
13 | ||
14 | #include <string> | |
15 | #include <iostream> | |
54f92ee4 | 16 | #include <sstream> |
07777235 SM |
17 | #include <set> |
18 | #include <list> | |
19 | #include <map> | |
20 | #include <vector> | |
21 | #include <queue> | |
22 | #include <utility> | |
23 | ||
24 | #include "translator-output.h" | |
25 | ||
26 | #include "stapregex-parse.h" | |
27 | #include "stapregex-tree.h" | |
28 | #include "stapregex-dfa.h" | |
29 | ||
54f92ee4 SM |
30 | // Uncomment to show result of ins (NFA) compilation: |
31 | //#define STAPREGEX_DEBUG_INS | |
32 | // Uncomment to display result of DFA compilation in a compact format: | |
33 | //#define STAPREGEX_DEBUG_DFA | |
112e9e2b SM |
34 | // Uncomment to have the generated engine do a trace of visited states: |
35 | //#define STAPREGEX_DEBUG_MATCH | |
54f92ee4 | 36 | |
07777235 SM |
37 | using namespace std; |
38 | ||
39 | namespace stapregex { | |
40 | ||
41 | regexp *pad_re = NULL; | |
42 | regexp *fail_re = NULL; | |
43 | ||
44 | dfa * | |
45 | stapregex_compile (regexp *re, const std::string& match_snippet, | |
46 | const std::string& fail_snippet) | |
47 | { | |
48 | if (pad_re == NULL) { | |
49 | // build regexp for ".*" | |
50 | pad_re = make_dot (); | |
248f3856 SM |
51 | pad_re = new close_op (pad_re, true); // -- prefer shorter match |
52 | pad_re = new alt_op (pad_re, new null_op, true); // -- prefer second match | |
07777235 SM |
53 | } |
54 | if (fail_re == NULL) { | |
9f232afb | 55 | // build regexp for ".*$", but allow '\0' and support fail outcome |
9b7e0f3d | 56 | fail_re = make_dot (true); // -- allow '\0' |
248f3856 SM |
57 | fail_re = new close_op (fail_re, true); // -- prefer shorter match |
58 | fail_re = new alt_op (fail_re, new null_op, true); // -- prefer second match | |
9f232afb | 59 | fail_re = new cat_op (fail_re, new anchor_op('$')); |
248f3856 | 60 | fail_re = new rule_op(fail_re, 0); |
9f232afb SM |
61 | // XXX: this approach creates one extra spurious-but-safe state |
62 | // (safe because the matching procedure stops after encountering '\0') | |
07777235 SM |
63 | } |
64 | ||
65 | vector<string> outcomes(2); | |
66 | outcomes[0] = fail_snippet; | |
67 | outcomes[1] = match_snippet; | |
68 | ||
69 | int num_tags = re->num_tags; | |
70 | ||
71 | // Pad & wrap re in appropriate rule_ops to control match behaviour: | |
8d2eb09e SM |
72 | bool anchored = re->anchored (); |
73 | if (!anchored) re = new cat_op(pad_re, re); // -- left-padding | |
07777235 SM |
74 | re = new rule_op(re, 1); |
75 | re = new alt_op(re, fail_re); | |
76 | ||
248f3856 SM |
77 | #ifdef STAPREGEX_DEBUG_INS |
78 | cerr << "RESULTING INS FROM REGEX " << re << ":" << endl; | |
79 | #endif | |
80 | ||
07777235 | 81 | ins *i = re->compile(); |
248f3856 SM |
82 | |
83 | #ifdef STAPREGEX_DEBUG_INS | |
84 | for (const ins *j = i; (j - i) < re->ins_size() + 1; ) | |
85 | { | |
86 | j = show_ins(cerr, j, i); cerr << endl; | |
87 | } | |
88 | cerr << endl; | |
89 | #endif | |
90 | ||
91 | // TODOXXX optimize ins as in re2c | |
92 | ||
07777235 SM |
93 | dfa *d = new dfa(i, num_tags, outcomes); |
94 | ||
8d2eb09e SM |
95 | // Carefully deallocate temporary scaffolding: |
96 | if (!anchored) delete ((rule_op*) ((alt_op*) re)->a)->re; // -- new cat_op | |
07777235 SM |
97 | delete ((alt_op*) re)->a; // -- new rule_op |
98 | delete re; // -- new alt_op | |
99 | // NB: deleting a regular expression DOES NOT deallocate its | |
100 | // children. The original re parameter is presumed to be retained | |
101 | // indefinitely as part of a stapdfa table, or such.... | |
102 | ||
103 | return d; | |
104 | } | |
105 | ||
106 | // ------------------------------------------------------------------------ | |
107 | ||
108 | /* Now follows the heart of the tagged-DFA algorithm. This is a basic | |
109 | implementation of the algorithm described in Ville Laurikari's | |
110 | Masters thesis and summarized in the paper "NFAs with Tagged | |
111 | Transitions, their Conversion to Deterministic Automata and | |
112 | Application to Regular Expressions" | |
113 | (http://laurikari.net/ville/spire2000-tnfa.pdf). | |
114 | ||
112e9e2b SM |
115 | TODOXXX: The following does not contain a fully working |
116 | implementation of the tagging support, but only of the regex | |
117 | matching. | |
118 | ||
07777235 SM |
119 | HERE BE DRAGONS (and not the friendly kind) */ |
120 | ||
07777235 SM |
121 | /* Functions to deal with relative transition priorities: */ |
122 | ||
123 | arc_priority | |
124 | refine_higher(const arc_priority& a) | |
125 | { | |
126 | return make_pair(2 * a.first + 1, a.second + 1); | |
127 | } | |
128 | ||
129 | arc_priority | |
130 | refine_lower (const arc_priority& a) | |
131 | { | |
132 | return make_pair(2 * a.first, a.second + 1); | |
133 | } | |
134 | ||
135 | int | |
136 | arc_compare (const arc_priority& a, const arc_priority& b) | |
137 | { | |
138 | unsigned long x = a.first; | |
139 | unsigned long y = b.first; | |
140 | ||
141 | if (a.second > b.second) | |
142 | x = x << (a.second - b.second); | |
143 | else if (a.second < b.second) | |
144 | y = y << (b.second - a.second); | |
145 | ||
146 | return ( x == y ? 0 : x < y ? -1 : 1 ); | |
147 | } | |
148 | ||
149 | /* Manage the linked list of states in a DFA: */ | |
150 | ||
151 | state::state (state_kernel *kernel) | |
98e18a65 | 152 | : label(~0), next(NULL), kernel(kernel), accepts(false), accept_outcome(0) {} |
07777235 SM |
153 | |
154 | state * | |
155 | dfa::add_state (state *s) | |
156 | { | |
157 | s->label = nstates++; | |
158 | ||
159 | if (last == NULL) | |
160 | { | |
161 | last = s; | |
162 | first = last; | |
163 | } | |
164 | else | |
165 | { | |
166 | // append to the end | |
167 | last->next = s; | |
168 | last = last->next; | |
169 | } | |
170 | ||
171 | return last; | |
172 | } | |
173 | ||
174 | /* Operations to build a simple kernel prior to taking closure: */ | |
175 | ||
9b7e0f3d SM |
176 | void |
177 | add_kernel (state_kernel *kernel, ins *i) | |
07777235 SM |
178 | { |
179 | kernel_point point; | |
180 | point.i = i; | |
181 | point.priority = make_pair(0,0); | |
182 | // NB: point->map_items is empty | |
183 | ||
184 | kernel->push_back(point); | |
185 | } | |
186 | ||
187 | state_kernel * | |
188 | make_kernel (ins *i) | |
189 | { | |
190 | state_kernel *kernel = new state_kernel; | |
191 | add_kernel (kernel, i); | |
192 | return kernel; | |
193 | } | |
194 | ||
195 | /* Compute the set of kernel_points that are 'tag-wise unambiguously | |
196 | reachable' from a given initial set of points. Absent tagging, this | |
197 | becomes a bog-standard NFA e_closure construction. */ | |
198 | state_kernel * | |
199 | te_closure (state_kernel *start, int ntags, bool is_initial = false) | |
200 | { | |
201 | state_kernel *closure = new state_kernel(*start); | |
202 | queue<kernel_point> worklist; | |
203 | ||
204 | /* To avoid searching through closure incessantly when retrieving | |
205 | information about existing elements, the following caches are | |
206 | needed: */ | |
207 | vector<unsigned> max_tags (ntags, 0); | |
208 | map<ins *, list<kernel_point> > closure_map; | |
209 | ||
210 | /* Reset priorities and cache initial elements of closure: */ | |
211 | for (state_kernel::iterator it = closure->begin(); | |
212 | it != closure->end(); it++) | |
213 | { | |
214 | it->priority = make_pair(0,0); | |
215 | worklist.push(*it); | |
216 | ||
217 | // Store the element in relevant caches: | |
218 | ||
219 | for (list<map_item>::const_iterator jt = it->map_items.begin(); | |
220 | jt != it->map_items.end(); jt++) | |
221 | max_tags[jt->first] = max(jt->second, max_tags[jt->first]); | |
222 | ||
223 | closure_map[it->i].push_back(*it); | |
224 | } | |
225 | ||
226 | while (!worklist.empty()) | |
227 | { | |
228 | kernel_point point = worklist.front(); worklist.pop(); | |
229 | ||
230 | // Identify e-transitions depending on the opcode. | |
231 | // There are at most two e-transitions emerging from an insn. | |
232 | // If we have two e-transitions, the 'other' has higher priority. | |
233 | ||
234 | ins *target = NULL; int tag = -1; | |
235 | ins *other_target = NULL; int other_tag = -1; | |
236 | ||
9b7e0f3d SM |
237 | // TODOXXX line-by-line proceeds below |
238 | ||
07777235 SM |
239 | bool do_split = false; |
240 | ||
241 | if (point.i->i.tag == TAG) | |
242 | { | |
243 | target = &point.i[1]; | |
244 | tag = (int) point.i->i.param; | |
245 | } | |
179688ce SM |
246 | else if (point.i->i.tag == FORK && point.i == (ins *) point.i->i.link) |
247 | { | |
248 | /* Workaround for a FORK that points to itself: */ | |
249 | target = &point.i[1]; | |
250 | } | |
07777235 SM |
251 | else if (point.i->i.tag == FORK) |
252 | { | |
253 | do_split = true; | |
254 | // Relative priority of two e-transitions depends on param: | |
255 | if (point.i->i.param) | |
256 | { | |
257 | // Prefer jumping to link. | |
258 | target = &point.i[1]; | |
259 | other_target = (ins *) point.i->i.link; | |
260 | } | |
261 | else | |
262 | { | |
263 | // Prefer stepping to next instruction. | |
264 | target = (ins *) point.i->i.link; | |
265 | other_target = &point.i[1]; | |
266 | } | |
267 | } | |
268 | else if (point.i->i.tag == GOTO) | |
269 | { | |
270 | target = (ins *) point.i->i.link; | |
271 | } | |
272 | else if (point.i->i.tag == INIT && is_initial) | |
273 | { | |
274 | target = &point.i[1]; | |
275 | } | |
276 | ||
179688ce SM |
277 | bool already_found; |
278 | ||
07777235 SM |
279 | // Data for the endpoint of the first transition: |
280 | kernel_point next; | |
281 | next.i = target; | |
282 | next.priority = do_split ? refine_lower(point.priority) : point.priority; | |
283 | next.map_items = point.map_items; | |
284 | ||
9b7e0f3d SM |
285 | // Date for the endpoint of the second transition: |
286 | kernel_point other_next; | |
287 | other_next.i = other_target; | |
288 | other_next.priority = do_split ? refine_higher(point.priority) : point.priority; | |
289 | other_next.map_items = point.map_items; | |
290 | ||
179688ce SM |
291 | // Do infinite-loop-check: |
292 | other_next.parents = point.parents; | |
293 | if (point.parents.find(other_next.i) != point.parents.end()) | |
294 | { | |
295 | other_target = NULL; | |
296 | other_tag = -1; | |
297 | } | |
298 | other_next.parents.insert(other_next.i); | |
299 | ||
300 | next.parents = point.parents; | |
301 | if (point.parents.find(next.i) != point.parents.end()) | |
302 | { | |
c92d3b42 FCE |
303 | // target = NULL; |
304 | // tag = -1; | |
305 | // ^^^ these are overwritten from other_target / other_tag immediately | |
179688ce SM |
306 | goto next_target; |
307 | } | |
308 | next.parents.insert(next.i); | |
309 | ||
07777235 | 310 | another_transition: |
9b7e0f3d SM |
311 | if (target == NULL) |
312 | continue; | |
07777235 SM |
313 | |
314 | // Deal with the current e-transition: | |
315 | ||
316 | if (tag >= 0) | |
317 | { | |
318 | /* Delete all existing next.map_items of the form m[tag,x]. */ | |
319 | for (list<map_item>::iterator it = next.map_items.begin(); | |
a6be9455 | 320 | it != next.map_items.end(); ) |
07777235 | 321 | if (it->first == (unsigned) tag) |
a6be9455 SM |
322 | { |
323 | list<map_item>::iterator next_it = it; | |
324 | next_it++; | |
325 | next.map_items.erase (it); | |
326 | it = next_it; | |
327 | } | |
328 | else it++; | |
07777235 SM |
329 | |
330 | /* Add m[tag,x] to next.map_items, where x is the smallest | |
331 | nonnegative integer such that m[tag,x] does not occur | |
332 | anywhere in closure. Then update the cache. */ | |
333 | unsigned x = max_tags[tag]; | |
334 | next.map_items.push_back(make_pair(tag, ++x)); | |
335 | max_tags[tag] = x; | |
336 | } | |
337 | ||
179688ce | 338 | already_found = false; |
07777235 SM |
339 | |
340 | /* Deal with similar transitions that have a different priority. */ | |
341 | for (list<kernel_point>::iterator it = closure_map[next.i].begin(); | |
248f3856 | 342 | it != closure_map[next.i].end(); ) |
07777235 SM |
343 | { |
344 | int result = arc_compare(it->priority, next.priority); | |
179688ce | 345 | |
07777235 | 346 | if (result > 0) { |
248f3856 SM |
347 | // obnoxious shuffle to avoid iterator invalidation |
348 | list<kernel_point>::iterator old_it = it; | |
349 | it++; | |
350 | closure_map[next.i].erase(old_it); | |
351 | continue; | |
07777235 SM |
352 | } else if (result == 0) { |
353 | already_found = true; | |
354 | } | |
248f3856 | 355 | it++; |
07777235 SM |
356 | } |
357 | ||
358 | if (!already_found) { | |
07777235 SM |
359 | // Store the element in relevant caches: |
360 | ||
248f3856 SM |
361 | closure_map[next.i].push_back(next); |
362 | ||
07777235 SM |
363 | for (list<map_item>::iterator jt = next.map_items.begin(); |
364 | jt != next.map_items.end(); jt++) | |
365 | max_tags[jt->first] = max(jt->second, max_tags[jt->first]); | |
366 | ||
248f3856 SM |
367 | // Store the element in closure: |
368 | closure->push_back(next); | |
369 | worklist.push(next); | |
07777235 SM |
370 | } |
371 | ||
179688ce | 372 | next_target: |
07777235 | 373 | // Now move to dealing with the second e-transition, if any. |
07777235 SM |
374 | target = other_target; other_target = NULL; |
375 | tag = other_tag; other_tag = -1; | |
9b7e0f3d | 376 | next = other_next; |
07777235 SM |
377 | |
378 | goto another_transition; | |
379 | } | |
380 | ||
381 | return closure; | |
382 | } | |
383 | ||
384 | /* Find the set of reordering commands (if any) that will get us from | |
385 | state s to some existing state in the dfa (returns the state in | |
386 | question, appends reordering commands to r). Returns NULL is no | |
387 | suitable state is found. */ | |
388 | state * | |
389 | dfa::find_equivalent (state *s, tdfa_action &r) | |
390 | { | |
391 | state *answer = NULL; | |
392 | ||
393 | for (state_kernel::iterator it = s->kernel->begin(); | |
394 | it != s->kernel->end(); it++) | |
9b7e0f3d | 395 | mark(it->i); |
8d2eb09e | 396 | |
07777235 SM |
397 | /* Check kernels of existing states for size equivalence and for |
398 | unmarked items (similar to re2c's original algorithm): */ | |
399 | unsigned n = s->kernel->size(); | |
400 | for (state *t = first; t != NULL; t = t->next) | |
401 | { | |
402 | if (t->kernel->size() == n) | |
403 | { | |
404 | for (state_kernel::iterator it = t->kernel->begin(); | |
405 | it != t->kernel->end(); it++) | |
8d2eb09e SM |
406 | if (!marked(it->i)) |
407 | goto next_state; | |
07777235 | 408 | |
8d2eb09e SM |
409 | // TODOXXX check for existence of reordering r |
410 | answer = t; | |
411 | goto cleanup; | |
07777235 | 412 | } |
8d2eb09e SM |
413 | next_state: |
414 | ; | |
07777235 SM |
415 | } |
416 | ||
417 | cleanup: | |
418 | for (state_kernel::iterator it = s->kernel->begin(); | |
419 | it != s->kernel->end(); it++) | |
420 | unmark(it->i); | |
421 | ||
422 | return answer; | |
423 | } | |
424 | ||
425 | ||
426 | dfa::dfa (ins *i, int ntags, vector<string>& outcome_snippets) | |
427 | : orig_nfa(i), nstates(0), ntags(ntags), outcome_snippets(outcome_snippets) | |
428 | { | |
429 | /* Initialize empty linked list of states: */ | |
430 | first = last = NULL; | |
431 | ||
432 | ins *start = &i[0]; | |
fb5669f0 SM |
433 | state_kernel *seed_kernel = make_kernel(start); |
434 | state_kernel *initial_kernel = te_closure(seed_kernel, ntags, true); | |
435 | delete seed_kernel; | |
248f3856 SM |
436 | state *initial = add_state(new state(initial_kernel)); |
437 | queue<state *> worklist; worklist.push(initial); | |
07777235 SM |
438 | |
439 | while (!worklist.empty()) | |
440 | { | |
248f3856 | 441 | state *curr = worklist.front(); worklist.pop(); |
07777235 SM |
442 | |
443 | vector<list<ins *> > edges(NUM_REAL_CHARS); | |
444 | ||
445 | /* Using the CHAR instructions in kernel, build the initial | |
446 | table of spans for curr. Also check for final states. */ | |
447 | ||
248f3856 SM |
448 | for (list<kernel_point>::iterator it = curr->kernel->begin(); |
449 | it != curr->kernel->end(); it++) | |
07777235 SM |
450 | { |
451 | if (it->i->i.tag == CHAR) | |
452 | { | |
453 | for (ins *j = &it->i[1]; j < (ins *) it->i->i.link; j++) | |
454 | edges[j->c.value].push_back((ins *) it->i->i.link); | |
455 | } | |
456 | else if (it->i->i.tag == ACCEPT) | |
457 | { | |
458 | /* Always prefer the highest numbered outcome: */ | |
459 | curr->accepts = true; | |
460 | curr->accept_outcome = max(it->i->i.param, curr->accept_outcome); | |
461 | } | |
462 | } | |
463 | ||
aaed700d | 464 | for (unsigned c = 0; c < NUM_REAL_CHARS; ) |
07777235 SM |
465 | { |
466 | list <ins *> e = edges[c]; | |
467 | assert (!e.empty()); // XXX: ensured by fail_re in stapregex_compile | |
468 | ||
469 | span s; | |
470 | ||
471 | s.lb = c; | |
472 | ||
473 | while (++c < NUM_REAL_CHARS && edges[c] == e) ; | |
474 | ||
248f3856 | 475 | s.ub = c - 1; |
07777235 SM |
476 | |
477 | s.reach_pairs = new state_kernel; | |
478 | ||
479 | for (list<ins *>::iterator it = e.begin(); | |
480 | it != e.end(); it++) | |
481 | add_kernel (s.reach_pairs, *it); | |
482 | ||
483 | curr->spans.push_back(s); | |
484 | } | |
485 | ||
486 | /* For each of the spans in curr, determine the reachable | |
487 | points assuming a character in the span. */ | |
488 | for (list<span>::iterator it = curr->spans.begin(); | |
489 | it != curr->spans.end(); it++) | |
490 | { | |
491 | state_kernel *reach_pairs = it->reach_pairs; | |
492 | ||
493 | /* Set up candidate target state: */ | |
494 | state_kernel *u_pairs = te_closure(reach_pairs, ntags); | |
495 | state *target = new state(u_pairs); | |
496 | tdfa_action c; | |
497 | ||
498 | /* Generate position-save commands for any map items | |
248f3856 | 499 | that do not appear in curr->kernel: */ |
07777235 SM |
500 | |
501 | set<map_item> all_items; | |
248f3856 SM |
502 | for (state_kernel::const_iterator jt = curr->kernel->begin(); |
503 | jt != curr->kernel->end(); jt++) | |
07777235 SM |
504 | for (list<map_item>::const_iterator kt = jt->map_items.begin(); |
505 | kt != jt->map_items.end(); jt++) | |
506 | all_items.insert(*kt); | |
507 | ||
508 | list<map_item> store_items; | |
509 | for (state_kernel::const_iterator jt = u_pairs->begin(); | |
510 | jt != u_pairs->end(); jt++) | |
511 | for (list<map_item>::const_iterator kt = jt->map_items.begin(); | |
512 | kt != jt->map_items.end(); kt++) | |
513 | if (all_items.find(*kt) == all_items.end()) | |
514 | store_items.push_back(*kt); | |
515 | ||
516 | for (list<map_item>::iterator jt = store_items.begin(); | |
517 | jt != store_items.end(); jt++) | |
518 | { | |
519 | // append m[i,n] <- <curr position> to c | |
520 | tdfa_insn insn; | |
521 | insn.to = *jt; | |
522 | insn.save_pos = true; | |
523 | c.push_back(insn); | |
524 | } | |
525 | ||
526 | /* If there is a state t_prime in states such that some | |
527 | sequence of reordering commands r produces t_prime | |
528 | from target, use t_prime as the target state, | |
529 | appending the reordering commands to c. */ | |
530 | state *t_prime = find_equivalent(target, c); | |
9b7e0f3d | 531 | if (t_prime != NULL) |
248f3856 | 532 | { |
07777235 SM |
533 | delete target; |
534 | } | |
535 | else | |
536 | { | |
537 | /* We need to actually add target to the dfa: */ | |
538 | t_prime = target; | |
539 | add_state(t_prime); | |
248f3856 | 540 | worklist.push(t_prime); |
07777235 SM |
541 | |
542 | if (t_prime->accepts) | |
543 | { | |
544 | // TODOXXX set the finisher of t_prime | |
545 | } | |
546 | } | |
547 | ||
548 | /* Set the transition: */ | |
549 | it->to = t_prime; | |
550 | it->action = c; | |
551 | } | |
552 | } | |
553 | } | |
554 | ||
555 | dfa::~dfa () | |
556 | { | |
557 | state * s; | |
558 | while ((s = first)) | |
559 | { | |
560 | first = s->next; | |
561 | delete s; | |
562 | } | |
563 | ||
564 | delete orig_nfa; | |
565 | } | |
566 | ||
567 | // ------------------------------------------------------------------------ | |
568 | ||
54f92ee4 SM |
569 | // TODOXXX add emission instructions for tag_ops |
570 | ||
571 | void | |
572 | span::emit_jump (translator_output *o, const dfa *d) const | |
573 | { | |
112e9e2b | 574 | #ifdef STAPREGEX_DEBUG_MATCH |
54f92ee4 | 575 | o->newline () << "printf(\" --> GOTO yystate%d\\n\", " << to->label << ");"; |
112e9e2b | 576 | #endif |
54f92ee4 SM |
577 | |
578 | // TODOXXX tags feature allows proper longest-match priority | |
579 | if (to->accepts) | |
580 | { | |
581 | emit_final(o, d); | |
582 | } | |
583 | else | |
584 | { | |
585 | o->newline () << "YYCURSOR++;"; | |
586 | o->newline () << "goto yystate" << to->label << ";"; | |
587 | } | |
588 | } | |
589 | ||
590 | /* Assuming the target DFA of the span is a final state, emit code to | |
591 | (TODOXXX) cleanup tags and exit with a final answer. */ | |
592 | void | |
593 | span::emit_final (translator_output *o, const dfa *d) const | |
594 | { | |
595 | assert (to->accepts); // XXX: must guarantee correct usage of emit_final() | |
596 | o->newline() << d->outcome_snippets[to->accept_outcome]; | |
597 | o->newline() << "goto yyfinish;"; | |
598 | } | |
599 | ||
600 | string c_char(char c) | |
601 | { | |
602 | stringstream o; | |
603 | o << "'"; | |
604 | print_escaped(o, c); | |
605 | o << "'"; | |
606 | return o.str(); | |
607 | } | |
608 | ||
609 | void | |
610 | state::emit (translator_output *o, const dfa *d) const | |
611 | { | |
612 | o->newline() << "yystate" << label << ": "; | |
112e9e2b | 613 | #ifdef STAPREGEX_DEBUG_MATCH |
54f92ee4 | 614 | o->newline () << "printf(\"READ '%s' %c\", cur, *YYCURSOR);"; |
112e9e2b | 615 | #endif |
54f92ee4 SM |
616 | o->newline() << "switch (*YYCURSOR) {"; |
617 | o->indent(1); | |
618 | for (list<span>::const_iterator it = spans.begin(); | |
619 | it != spans.end(); it++) | |
620 | { | |
621 | // If we see a '\0', go immediately into an accept state: | |
622 | if (it->lb == '\0') | |
623 | { | |
624 | o->newline() << "case " << c_char('\0') << ":"; | |
625 | it->emit_final(o, d); // TODOXXX extra function may be unneeded | |
626 | } | |
627 | ||
628 | // Emit labels to handle all the other elements of the span: | |
112e9e2b | 629 | for (unsigned c = max('\1', it->lb); c <= (unsigned) it->ub; c++) { |
54f92ee4 SM |
630 | o->newline() << "case " << c_char((char) c) << ":"; |
631 | } | |
632 | it->emit_jump(o, d); | |
633 | ||
634 | // TODOXXX handle a 'default' set of characters for the largest span... | |
635 | // TODOXXX optimize by accepting before end of string whenever possible... (also necessary for proper first-matched-substring selection) | |
636 | } | |
637 | o->newline(-1) << "}"; | |
638 | } | |
639 | ||
07777235 SM |
640 | void |
641 | dfa::emit (translator_output *o) const | |
642 | { | |
54f92ee4 | 643 | #ifdef STAPREGEX_DEBUG_DFA |
07777235 | 644 | print(o); |
54f92ee4 SM |
645 | #else |
646 | o->newline() << "{"; | |
647 | o->newline(1); | |
e5f8ae47 SM |
648 | |
649 | // XXX: workaround for empty regex | |
650 | if (first->accepts) | |
651 | { | |
652 | o->newline() << outcome_snippets[first->accept_outcome]; | |
653 | o->newline() << "goto yyfinish;"; | |
654 | } | |
655 | ||
54f92ee4 SM |
656 | for (state *s = first; s; s = s->next) |
657 | s->emit(o, this); | |
e5f8ae47 | 658 | |
54f92ee4 SM |
659 | o->newline() << "yyfinish: ;"; |
660 | o->newline(-1) << "}"; | |
661 | #endif | |
07777235 SM |
662 | } |
663 | ||
664 | void | |
665 | dfa::emit_tagsave (translator_output *o, std::string tag_states, | |
666 | std::string tag_vals, std::string tag_count) const | |
667 | { | |
668 | // TODOXXX implement after testing the preceding algorithms | |
669 | } | |
670 | ||
671 | // ------------------------------------------------------------------------ | |
672 | ||
673 | std::ostream& | |
674 | operator << (std::ostream &o, const tdfa_action& a) | |
675 | { | |
676 | for (list<tdfa_insn>::const_iterator it = a.begin(); | |
677 | it != a.end(); it++) | |
678 | { | |
679 | if (it != a.begin()) o << "; "; | |
680 | ||
681 | o << "m[" << it->to.first << "," << it->to.second << "] <- "; | |
682 | ||
683 | if (it->save_pos) | |
684 | o << "p"; | |
685 | else | |
686 | o << "m[" << it->from.first << "," << it->from.second << "]"; | |
687 | } | |
688 | ||
689 | return o; | |
690 | } | |
691 | ||
248f3856 SM |
692 | std::ostream& |
693 | operator << (std::ostream &o, const arc_priority& p) | |
694 | { | |
695 | o << p.first << "/" << (1 << p.second); | |
696 | return o; | |
697 | } | |
698 | ||
07777235 SM |
699 | void |
700 | state::print (translator_output *o) const | |
701 | { | |
702 | o->line() << "state " << label; | |
703 | if (accepts) | |
704 | o->line() << " accepts " << accept_outcome; | |
705 | if (!finalizer.empty()) | |
706 | o->line() << " [" << finalizer << "]"; | |
707 | ||
708 | o->indent(1); | |
709 | for (list<span>::const_iterator it = spans.begin(); | |
710 | it != spans.end(); it++) | |
711 | { | |
248f3856 | 712 | o->newline() << "'"; |
07777235 | 713 | if (it->lb == it->ub) |
248f3856 SM |
714 | { |
715 | print_escaped (o->line(), it->lb); | |
716 | o->line() << " "; | |
717 | } | |
07777235 | 718 | else |
248f3856 SM |
719 | { |
720 | print_escaped (o->line(), it->lb); | |
721 | o->line() << "-"; | |
722 | print_escaped (o->line(), it->ub); | |
723 | } | |
07777235 | 724 | |
248f3856 | 725 | o->line() << "' -> " << it->to->label; |
07777235 SM |
726 | |
727 | if (!it->action.empty()) | |
728 | o->line() << " [" << it->action << "]"; | |
729 | } | |
730 | o->newline(-1); | |
731 | } | |
732 | ||
733 | void | |
734 | dfa::print (std::ostream& o) const | |
735 | { | |
736 | translator_output to(o); print(&to); | |
737 | } | |
738 | ||
739 | void | |
740 | dfa::print (translator_output *o) const | |
741 | { | |
248f3856 | 742 | o->newline(); |
07777235 SM |
743 | for (state *s = first; s; s = s->next) |
744 | { | |
745 | s->print(o); | |
746 | o->newline(); | |
747 | } | |
248f3856 | 748 | o->newline(); |
07777235 SM |
749 | } |
750 | ||
751 | std::ostream& | |
752 | operator << (std::ostream& o, const dfa& d) | |
753 | { | |
754 | d.print(o); | |
755 | return o; | |
756 | } | |
757 | ||
758 | std::ostream& | |
759 | operator << (std::ostream &o, const dfa *d) | |
760 | { | |
761 | o << *d; | |
762 | return o; | |
763 | } | |
764 | ||
765 | }; | |
766 | ||
767 | /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */ |