]>
Commit | Line | Data |
---|---|---|
07777235 SM |
1 | // -*- C++ -*- |
2 | // Copyright (C) 2012-2013 Red Hat Inc. | |
3 | // | |
4 | // This file is part of systemtap, and is free software. You can | |
5 | // redistribute it and/or modify it under the terms of the GNU General | |
6 | // Public License (GPL); either version 2, or (at your option) any | |
7 | // later version. | |
8 | // | |
9 | // --- | |
10 | // | |
11 | // This file incorporates code from the re2c project; please see | |
12 | // the file README.stapregex for details. | |
13 | ||
14 | #include <string> | |
15 | #include <iostream> | |
54f92ee4 | 16 | #include <sstream> |
07777235 SM |
17 | #include <set> |
18 | #include <list> | |
19 | #include <map> | |
20 | #include <vector> | |
21 | #include <queue> | |
22 | #include <utility> | |
23 | ||
24 | #include "translator-output.h" | |
25 | ||
26 | #include "stapregex-parse.h" | |
27 | #include "stapregex-tree.h" | |
28 | #include "stapregex-dfa.h" | |
29 | ||
54f92ee4 SM |
30 | // Uncomment to show result of ins (NFA) compilation: |
31 | //#define STAPREGEX_DEBUG_INS | |
32 | // Uncomment to display result of DFA compilation in a compact format: | |
33 | //#define STAPREGEX_DEBUG_DFA | |
34 | ||
07777235 SM |
35 | using namespace std; |
36 | ||
37 | namespace stapregex { | |
38 | ||
39 | regexp *pad_re = NULL; | |
40 | regexp *fail_re = NULL; | |
41 | ||
42 | dfa * | |
43 | stapregex_compile (regexp *re, const std::string& match_snippet, | |
44 | const std::string& fail_snippet) | |
45 | { | |
46 | if (pad_re == NULL) { | |
47 | // build regexp for ".*" | |
48 | pad_re = make_dot (); | |
248f3856 SM |
49 | pad_re = new close_op (pad_re, true); // -- prefer shorter match |
50 | pad_re = new alt_op (pad_re, new null_op, true); // -- prefer second match | |
07777235 SM |
51 | } |
52 | if (fail_re == NULL) { | |
9f232afb | 53 | // build regexp for ".*$", but allow '\0' and support fail outcome |
9b7e0f3d | 54 | fail_re = make_dot (true); // -- allow '\0' |
248f3856 SM |
55 | fail_re = new close_op (fail_re, true); // -- prefer shorter match |
56 | fail_re = new alt_op (fail_re, new null_op, true); // -- prefer second match | |
9f232afb | 57 | fail_re = new cat_op (fail_re, new anchor_op('$')); |
248f3856 | 58 | fail_re = new rule_op(fail_re, 0); |
9f232afb SM |
59 | // XXX: this approach creates one extra spurious-but-safe state |
60 | // (safe because the matching procedure stops after encountering '\0') | |
07777235 SM |
61 | } |
62 | ||
63 | vector<string> outcomes(2); | |
64 | outcomes[0] = fail_snippet; | |
65 | outcomes[1] = match_snippet; | |
66 | ||
67 | int num_tags = re->num_tags; | |
68 | ||
69 | // Pad & wrap re in appropriate rule_ops to control match behaviour: | |
8d2eb09e SM |
70 | bool anchored = re->anchored (); |
71 | if (!anchored) re = new cat_op(pad_re, re); // -- left-padding | |
07777235 SM |
72 | re = new rule_op(re, 1); |
73 | re = new alt_op(re, fail_re); | |
74 | ||
248f3856 SM |
75 | #ifdef STAPREGEX_DEBUG_INS |
76 | cerr << "RESULTING INS FROM REGEX " << re << ":" << endl; | |
77 | #endif | |
78 | ||
07777235 | 79 | ins *i = re->compile(); |
248f3856 SM |
80 | |
81 | #ifdef STAPREGEX_DEBUG_INS | |
82 | for (const ins *j = i; (j - i) < re->ins_size() + 1; ) | |
83 | { | |
84 | j = show_ins(cerr, j, i); cerr << endl; | |
85 | } | |
86 | cerr << endl; | |
87 | #endif | |
88 | ||
89 | // TODOXXX optimize ins as in re2c | |
90 | ||
07777235 SM |
91 | dfa *d = new dfa(i, num_tags, outcomes); |
92 | ||
8d2eb09e SM |
93 | // Carefully deallocate temporary scaffolding: |
94 | if (!anchored) delete ((rule_op*) ((alt_op*) re)->a)->re; // -- new cat_op | |
07777235 SM |
95 | delete ((alt_op*) re)->a; // -- new rule_op |
96 | delete re; // -- new alt_op | |
97 | // NB: deleting a regular expression DOES NOT deallocate its | |
98 | // children. The original re parameter is presumed to be retained | |
99 | // indefinitely as part of a stapdfa table, or such.... | |
100 | ||
101 | return d; | |
102 | } | |
103 | ||
104 | // ------------------------------------------------------------------------ | |
105 | ||
106 | /* Now follows the heart of the tagged-DFA algorithm. This is a basic | |
107 | implementation of the algorithm described in Ville Laurikari's | |
108 | Masters thesis and summarized in the paper "NFAs with Tagged | |
109 | Transitions, their Conversion to Deterministic Automata and | |
110 | Application to Regular Expressions" | |
111 | (http://laurikari.net/ville/spire2000-tnfa.pdf). | |
112 | ||
113 | HERE BE DRAGONS (and not the friendly kind) */ | |
114 | ||
07777235 SM |
115 | /* Functions to deal with relative transition priorities: */ |
116 | ||
117 | arc_priority | |
118 | refine_higher(const arc_priority& a) | |
119 | { | |
120 | return make_pair(2 * a.first + 1, a.second + 1); | |
121 | } | |
122 | ||
123 | arc_priority | |
124 | refine_lower (const arc_priority& a) | |
125 | { | |
126 | return make_pair(2 * a.first, a.second + 1); | |
127 | } | |
128 | ||
129 | int | |
130 | arc_compare (const arc_priority& a, const arc_priority& b) | |
131 | { | |
132 | unsigned long x = a.first; | |
133 | unsigned long y = b.first; | |
134 | ||
135 | if (a.second > b.second) | |
136 | x = x << (a.second - b.second); | |
137 | else if (a.second < b.second) | |
138 | y = y << (b.second - a.second); | |
139 | ||
140 | return ( x == y ? 0 : x < y ? -1 : 1 ); | |
141 | } | |
142 | ||
143 | /* Manage the linked list of states in a DFA: */ | |
144 | ||
145 | state::state (state_kernel *kernel) | |
146 | : next(NULL), kernel(kernel), accepts(false), accept_outcome(0) {} | |
147 | ||
148 | state * | |
149 | dfa::add_state (state *s) | |
150 | { | |
151 | s->label = nstates++; | |
152 | ||
153 | if (last == NULL) | |
154 | { | |
155 | last = s; | |
156 | first = last; | |
157 | } | |
158 | else | |
159 | { | |
160 | // append to the end | |
161 | last->next = s; | |
162 | last = last->next; | |
163 | } | |
164 | ||
165 | return last; | |
166 | } | |
167 | ||
168 | /* Operations to build a simple kernel prior to taking closure: */ | |
169 | ||
9b7e0f3d SM |
170 | void |
171 | add_kernel (state_kernel *kernel, ins *i) | |
07777235 SM |
172 | { |
173 | kernel_point point; | |
174 | point.i = i; | |
175 | point.priority = make_pair(0,0); | |
176 | // NB: point->map_items is empty | |
177 | ||
178 | kernel->push_back(point); | |
179 | } | |
180 | ||
181 | state_kernel * | |
182 | make_kernel (ins *i) | |
183 | { | |
184 | state_kernel *kernel = new state_kernel; | |
185 | add_kernel (kernel, i); | |
186 | return kernel; | |
187 | } | |
188 | ||
189 | /* Compute the set of kernel_points that are 'tag-wise unambiguously | |
190 | reachable' from a given initial set of points. Absent tagging, this | |
191 | becomes a bog-standard NFA e_closure construction. */ | |
192 | state_kernel * | |
193 | te_closure (state_kernel *start, int ntags, bool is_initial = false) | |
194 | { | |
195 | state_kernel *closure = new state_kernel(*start); | |
196 | queue<kernel_point> worklist; | |
197 | ||
198 | /* To avoid searching through closure incessantly when retrieving | |
199 | information about existing elements, the following caches are | |
200 | needed: */ | |
201 | vector<unsigned> max_tags (ntags, 0); | |
202 | map<ins *, list<kernel_point> > closure_map; | |
203 | ||
204 | /* Reset priorities and cache initial elements of closure: */ | |
205 | for (state_kernel::iterator it = closure->begin(); | |
206 | it != closure->end(); it++) | |
207 | { | |
208 | it->priority = make_pair(0,0); | |
209 | worklist.push(*it); | |
210 | ||
211 | // Store the element in relevant caches: | |
212 | ||
213 | for (list<map_item>::const_iterator jt = it->map_items.begin(); | |
214 | jt != it->map_items.end(); jt++) | |
215 | max_tags[jt->first] = max(jt->second, max_tags[jt->first]); | |
216 | ||
217 | closure_map[it->i].push_back(*it); | |
218 | } | |
219 | ||
220 | while (!worklist.empty()) | |
221 | { | |
222 | kernel_point point = worklist.front(); worklist.pop(); | |
223 | ||
224 | // Identify e-transitions depending on the opcode. | |
225 | // There are at most two e-transitions emerging from an insn. | |
226 | // If we have two e-transitions, the 'other' has higher priority. | |
227 | ||
228 | ins *target = NULL; int tag = -1; | |
229 | ins *other_target = NULL; int other_tag = -1; | |
230 | ||
9b7e0f3d SM |
231 | // TODOXXX line-by-line proceeds below |
232 | ||
07777235 SM |
233 | bool do_split = false; |
234 | ||
235 | if (point.i->i.tag == TAG) | |
236 | { | |
237 | target = &point.i[1]; | |
238 | tag = (int) point.i->i.param; | |
239 | } | |
179688ce SM |
240 | else if (point.i->i.tag == FORK && point.i == (ins *) point.i->i.link) |
241 | { | |
242 | /* Workaround for a FORK that points to itself: */ | |
243 | target = &point.i[1]; | |
244 | } | |
07777235 SM |
245 | else if (point.i->i.tag == FORK) |
246 | { | |
247 | do_split = true; | |
248 | // Relative priority of two e-transitions depends on param: | |
249 | if (point.i->i.param) | |
250 | { | |
251 | // Prefer jumping to link. | |
252 | target = &point.i[1]; | |
253 | other_target = (ins *) point.i->i.link; | |
254 | } | |
255 | else | |
256 | { | |
257 | // Prefer stepping to next instruction. | |
258 | target = (ins *) point.i->i.link; | |
259 | other_target = &point.i[1]; | |
260 | } | |
261 | } | |
262 | else if (point.i->i.tag == GOTO) | |
263 | { | |
264 | target = (ins *) point.i->i.link; | |
265 | } | |
266 | else if (point.i->i.tag == INIT && is_initial) | |
267 | { | |
268 | target = &point.i[1]; | |
269 | } | |
270 | ||
179688ce SM |
271 | bool already_found; |
272 | ||
07777235 SM |
273 | // Data for the endpoint of the first transition: |
274 | kernel_point next; | |
275 | next.i = target; | |
276 | next.priority = do_split ? refine_lower(point.priority) : point.priority; | |
277 | next.map_items = point.map_items; | |
278 | ||
9b7e0f3d SM |
279 | // Date for the endpoint of the second transition: |
280 | kernel_point other_next; | |
281 | other_next.i = other_target; | |
282 | other_next.priority = do_split ? refine_higher(point.priority) : point.priority; | |
283 | other_next.map_items = point.map_items; | |
284 | ||
179688ce SM |
285 | // Do infinite-loop-check: |
286 | other_next.parents = point.parents; | |
287 | if (point.parents.find(other_next.i) != point.parents.end()) | |
288 | { | |
289 | other_target = NULL; | |
290 | other_tag = -1; | |
291 | } | |
292 | other_next.parents.insert(other_next.i); | |
293 | ||
294 | next.parents = point.parents; | |
295 | if (point.parents.find(next.i) != point.parents.end()) | |
296 | { | |
297 | target = NULL; | |
298 | tag = -1; | |
299 | goto next_target; | |
300 | } | |
301 | next.parents.insert(next.i); | |
302 | ||
07777235 | 303 | another_transition: |
9b7e0f3d SM |
304 | if (target == NULL) |
305 | continue; | |
07777235 SM |
306 | |
307 | // Deal with the current e-transition: | |
308 | ||
309 | if (tag >= 0) | |
310 | { | |
311 | /* Delete all existing next.map_items of the form m[tag,x]. */ | |
312 | for (list<map_item>::iterator it = next.map_items.begin(); | |
313 | it != next.map_items.end(); it++) | |
314 | if (it->first == (unsigned) tag) | |
315 | next.map_items.erase (it); | |
316 | ||
317 | /* Add m[tag,x] to next.map_items, where x is the smallest | |
318 | nonnegative integer such that m[tag,x] does not occur | |
319 | anywhere in closure. Then update the cache. */ | |
320 | unsigned x = max_tags[tag]; | |
321 | next.map_items.push_back(make_pair(tag, ++x)); | |
322 | max_tags[tag] = x; | |
323 | } | |
324 | ||
179688ce | 325 | already_found = false; |
07777235 SM |
326 | |
327 | /* Deal with similar transitions that have a different priority. */ | |
328 | for (list<kernel_point>::iterator it = closure_map[next.i].begin(); | |
248f3856 | 329 | it != closure_map[next.i].end(); ) |
07777235 SM |
330 | { |
331 | int result = arc_compare(it->priority, next.priority); | |
179688ce | 332 | |
07777235 | 333 | if (result > 0) { |
248f3856 SM |
334 | // obnoxious shuffle to avoid iterator invalidation |
335 | list<kernel_point>::iterator old_it = it; | |
336 | it++; | |
337 | closure_map[next.i].erase(old_it); | |
338 | continue; | |
07777235 SM |
339 | } else if (result == 0) { |
340 | already_found = true; | |
341 | } | |
248f3856 | 342 | it++; |
07777235 SM |
343 | } |
344 | ||
345 | if (!already_found) { | |
07777235 SM |
346 | // Store the element in relevant caches: |
347 | ||
248f3856 SM |
348 | closure_map[next.i].push_back(next); |
349 | ||
07777235 SM |
350 | for (list<map_item>::iterator jt = next.map_items.begin(); |
351 | jt != next.map_items.end(); jt++) | |
352 | max_tags[jt->first] = max(jt->second, max_tags[jt->first]); | |
353 | ||
248f3856 SM |
354 | // Store the element in closure: |
355 | closure->push_back(next); | |
356 | worklist.push(next); | |
07777235 SM |
357 | } |
358 | ||
179688ce | 359 | next_target: |
07777235 | 360 | // Now move to dealing with the second e-transition, if any. |
07777235 SM |
361 | target = other_target; other_target = NULL; |
362 | tag = other_tag; other_tag = -1; | |
9b7e0f3d | 363 | next = other_next; |
07777235 SM |
364 | |
365 | goto another_transition; | |
366 | } | |
367 | ||
368 | return closure; | |
369 | } | |
370 | ||
371 | /* Find the set of reordering commands (if any) that will get us from | |
372 | state s to some existing state in the dfa (returns the state in | |
373 | question, appends reordering commands to r). Returns NULL is no | |
374 | suitable state is found. */ | |
375 | state * | |
376 | dfa::find_equivalent (state *s, tdfa_action &r) | |
377 | { | |
378 | state *answer = NULL; | |
379 | ||
380 | for (state_kernel::iterator it = s->kernel->begin(); | |
381 | it != s->kernel->end(); it++) | |
9b7e0f3d | 382 | mark(it->i); |
8d2eb09e | 383 | |
07777235 SM |
384 | /* Check kernels of existing states for size equivalence and for |
385 | unmarked items (similar to re2c's original algorithm): */ | |
386 | unsigned n = s->kernel->size(); | |
387 | for (state *t = first; t != NULL; t = t->next) | |
388 | { | |
389 | if (t->kernel->size() == n) | |
390 | { | |
391 | for (state_kernel::iterator it = t->kernel->begin(); | |
392 | it != t->kernel->end(); it++) | |
8d2eb09e SM |
393 | if (!marked(it->i)) |
394 | goto next_state; | |
07777235 | 395 | |
8d2eb09e SM |
396 | // TODOXXX check for existence of reordering r |
397 | answer = t; | |
398 | goto cleanup; | |
07777235 | 399 | } |
8d2eb09e SM |
400 | next_state: |
401 | ; | |
07777235 SM |
402 | } |
403 | ||
404 | cleanup: | |
405 | for (state_kernel::iterator it = s->kernel->begin(); | |
406 | it != s->kernel->end(); it++) | |
407 | unmark(it->i); | |
408 | ||
409 | return answer; | |
410 | } | |
411 | ||
412 | ||
413 | dfa::dfa (ins *i, int ntags, vector<string>& outcome_snippets) | |
414 | : orig_nfa(i), nstates(0), ntags(ntags), outcome_snippets(outcome_snippets) | |
415 | { | |
416 | /* Initialize empty linked list of states: */ | |
417 | first = last = NULL; | |
418 | ||
419 | ins *start = &i[0]; | |
9f232afb | 420 | state_kernel *initial_kernel = te_closure(make_kernel(start), ntags, true); |
248f3856 SM |
421 | state *initial = add_state(new state(initial_kernel)); |
422 | queue<state *> worklist; worklist.push(initial); | |
07777235 SM |
423 | |
424 | while (!worklist.empty()) | |
425 | { | |
248f3856 | 426 | state *curr = worklist.front(); worklist.pop(); |
07777235 SM |
427 | |
428 | vector<list<ins *> > edges(NUM_REAL_CHARS); | |
429 | ||
430 | /* Using the CHAR instructions in kernel, build the initial | |
431 | table of spans for curr. Also check for final states. */ | |
432 | ||
248f3856 SM |
433 | for (list<kernel_point>::iterator it = curr->kernel->begin(); |
434 | it != curr->kernel->end(); it++) | |
07777235 SM |
435 | { |
436 | if (it->i->i.tag == CHAR) | |
437 | { | |
438 | for (ins *j = &it->i[1]; j < (ins *) it->i->i.link; j++) | |
439 | edges[j->c.value].push_back((ins *) it->i->i.link); | |
440 | } | |
441 | else if (it->i->i.tag == ACCEPT) | |
442 | { | |
443 | /* Always prefer the highest numbered outcome: */ | |
444 | curr->accepts = true; | |
445 | curr->accept_outcome = max(it->i->i.param, curr->accept_outcome); | |
446 | } | |
447 | } | |
448 | ||
aaed700d | 449 | for (unsigned c = 0; c < NUM_REAL_CHARS; ) |
07777235 SM |
450 | { |
451 | list <ins *> e = edges[c]; | |
452 | assert (!e.empty()); // XXX: ensured by fail_re in stapregex_compile | |
453 | ||
454 | span s; | |
455 | ||
456 | s.lb = c; | |
457 | ||
458 | while (++c < NUM_REAL_CHARS && edges[c] == e) ; | |
459 | ||
248f3856 | 460 | s.ub = c - 1; |
07777235 SM |
461 | |
462 | s.reach_pairs = new state_kernel; | |
463 | ||
464 | for (list<ins *>::iterator it = e.begin(); | |
465 | it != e.end(); it++) | |
466 | add_kernel (s.reach_pairs, *it); | |
467 | ||
468 | curr->spans.push_back(s); | |
469 | } | |
470 | ||
471 | /* For each of the spans in curr, determine the reachable | |
472 | points assuming a character in the span. */ | |
473 | for (list<span>::iterator it = curr->spans.begin(); | |
474 | it != curr->spans.end(); it++) | |
475 | { | |
476 | state_kernel *reach_pairs = it->reach_pairs; | |
477 | ||
478 | /* Set up candidate target state: */ | |
479 | state_kernel *u_pairs = te_closure(reach_pairs, ntags); | |
480 | state *target = new state(u_pairs); | |
481 | tdfa_action c; | |
482 | ||
483 | /* Generate position-save commands for any map items | |
248f3856 | 484 | that do not appear in curr->kernel: */ |
07777235 SM |
485 | |
486 | set<map_item> all_items; | |
248f3856 SM |
487 | for (state_kernel::const_iterator jt = curr->kernel->begin(); |
488 | jt != curr->kernel->end(); jt++) | |
07777235 SM |
489 | for (list<map_item>::const_iterator kt = jt->map_items.begin(); |
490 | kt != jt->map_items.end(); jt++) | |
491 | all_items.insert(*kt); | |
492 | ||
493 | list<map_item> store_items; | |
494 | for (state_kernel::const_iterator jt = u_pairs->begin(); | |
495 | jt != u_pairs->end(); jt++) | |
496 | for (list<map_item>::const_iterator kt = jt->map_items.begin(); | |
497 | kt != jt->map_items.end(); kt++) | |
498 | if (all_items.find(*kt) == all_items.end()) | |
499 | store_items.push_back(*kt); | |
500 | ||
501 | for (list<map_item>::iterator jt = store_items.begin(); | |
502 | jt != store_items.end(); jt++) | |
503 | { | |
504 | // append m[i,n] <- <curr position> to c | |
505 | tdfa_insn insn; | |
506 | insn.to = *jt; | |
507 | insn.save_pos = true; | |
508 | c.push_back(insn); | |
509 | } | |
510 | ||
511 | /* If there is a state t_prime in states such that some | |
512 | sequence of reordering commands r produces t_prime | |
513 | from target, use t_prime as the target state, | |
514 | appending the reordering commands to c. */ | |
515 | state *t_prime = find_equivalent(target, c); | |
9b7e0f3d | 516 | if (t_prime != NULL) |
248f3856 | 517 | { |
07777235 SM |
518 | delete target; |
519 | } | |
520 | else | |
521 | { | |
522 | /* We need to actually add target to the dfa: */ | |
523 | t_prime = target; | |
524 | add_state(t_prime); | |
248f3856 | 525 | worklist.push(t_prime); |
07777235 SM |
526 | |
527 | if (t_prime->accepts) | |
528 | { | |
529 | // TODOXXX set the finisher of t_prime | |
530 | } | |
531 | } | |
532 | ||
533 | /* Set the transition: */ | |
534 | it->to = t_prime; | |
535 | it->action = c; | |
536 | } | |
537 | } | |
538 | } | |
539 | ||
540 | dfa::~dfa () | |
541 | { | |
542 | state * s; | |
543 | while ((s = first)) | |
544 | { | |
545 | first = s->next; | |
546 | delete s; | |
547 | } | |
548 | ||
549 | delete orig_nfa; | |
550 | } | |
551 | ||
552 | // ------------------------------------------------------------------------ | |
553 | ||
54f92ee4 SM |
554 | // TODOXXX add emission instructions for tag_ops |
555 | ||
556 | void | |
557 | span::emit_jump (translator_output *o, const dfa *d) const | |
558 | { | |
559 | o->newline () << "printf(\" --> GOTO yystate%d\\n\", " << to->label << ");"; | |
560 | ||
561 | // TODOXXX tags feature allows proper longest-match priority | |
562 | if (to->accepts) | |
563 | { | |
564 | emit_final(o, d); | |
565 | } | |
566 | else | |
567 | { | |
568 | o->newline () << "YYCURSOR++;"; | |
569 | o->newline () << "goto yystate" << to->label << ";"; | |
570 | } | |
571 | } | |
572 | ||
573 | /* Assuming the target DFA of the span is a final state, emit code to | |
574 | (TODOXXX) cleanup tags and exit with a final answer. */ | |
575 | void | |
576 | span::emit_final (translator_output *o, const dfa *d) const | |
577 | { | |
578 | assert (to->accepts); // XXX: must guarantee correct usage of emit_final() | |
579 | o->newline() << d->outcome_snippets[to->accept_outcome]; | |
580 | o->newline() << "goto yyfinish;"; | |
581 | } | |
582 | ||
583 | string c_char(char c) | |
584 | { | |
585 | stringstream o; | |
586 | o << "'"; | |
587 | print_escaped(o, c); | |
588 | o << "'"; | |
589 | return o.str(); | |
590 | } | |
591 | ||
592 | void | |
593 | state::emit (translator_output *o, const dfa *d) const | |
594 | { | |
595 | o->newline() << "yystate" << label << ": "; | |
596 | o->newline () << "printf(\"READ '%s' %c\", cur, *YYCURSOR);"; | |
597 | o->newline() << "switch (*YYCURSOR) {"; | |
598 | o->indent(1); | |
599 | for (list<span>::const_iterator it = spans.begin(); | |
600 | it != spans.end(); it++) | |
601 | { | |
602 | // If we see a '\0', go immediately into an accept state: | |
603 | if (it->lb == '\0') | |
604 | { | |
605 | o->newline() << "case " << c_char('\0') << ":"; | |
606 | it->emit_final(o, d); // TODOXXX extra function may be unneeded | |
607 | } | |
608 | ||
609 | // Emit labels to handle all the other elements of the span: | |
610 | for (unsigned c = max('\1', it->lb); c <= it->ub; c++) { | |
611 | o->newline() << "case " << c_char((char) c) << ":"; | |
612 | } | |
613 | it->emit_jump(o, d); | |
614 | ||
615 | // TODOXXX handle a 'default' set of characters for the largest span... | |
616 | // TODOXXX optimize by accepting before end of string whenever possible... (also necessary for proper first-matched-substring selection) | |
617 | } | |
618 | o->newline(-1) << "}"; | |
619 | } | |
620 | ||
07777235 SM |
621 | void |
622 | dfa::emit (translator_output *o) const | |
623 | { | |
54f92ee4 | 624 | #ifdef STAPREGEX_DEBUG_DFA |
07777235 | 625 | print(o); |
54f92ee4 SM |
626 | #else |
627 | o->newline() << "{"; | |
628 | o->newline(1); | |
629 | for (state *s = first; s; s = s->next) | |
630 | s->emit(o, this); | |
631 | o->newline() << "yyfinish: ;"; | |
632 | o->newline(-1) << "}"; | |
633 | #endif | |
07777235 SM |
634 | } |
635 | ||
636 | void | |
637 | dfa::emit_tagsave (translator_output *o, std::string tag_states, | |
638 | std::string tag_vals, std::string tag_count) const | |
639 | { | |
640 | // TODOXXX implement after testing the preceding algorithms | |
641 | } | |
642 | ||
643 | // ------------------------------------------------------------------------ | |
644 | ||
645 | std::ostream& | |
646 | operator << (std::ostream &o, const tdfa_action& a) | |
647 | { | |
648 | for (list<tdfa_insn>::const_iterator it = a.begin(); | |
649 | it != a.end(); it++) | |
650 | { | |
651 | if (it != a.begin()) o << "; "; | |
652 | ||
653 | o << "m[" << it->to.first << "," << it->to.second << "] <- "; | |
654 | ||
655 | if (it->save_pos) | |
656 | o << "p"; | |
657 | else | |
658 | o << "m[" << it->from.first << "," << it->from.second << "]"; | |
659 | } | |
660 | ||
661 | return o; | |
662 | } | |
663 | ||
248f3856 SM |
664 | std::ostream& |
665 | operator << (std::ostream &o, const arc_priority& p) | |
666 | { | |
667 | o << p.first << "/" << (1 << p.second); | |
668 | return o; | |
669 | } | |
670 | ||
07777235 SM |
671 | void |
672 | state::print (translator_output *o) const | |
673 | { | |
674 | o->line() << "state " << label; | |
675 | if (accepts) | |
676 | o->line() << " accepts " << accept_outcome; | |
677 | if (!finalizer.empty()) | |
678 | o->line() << " [" << finalizer << "]"; | |
679 | ||
680 | o->indent(1); | |
681 | for (list<span>::const_iterator it = spans.begin(); | |
682 | it != spans.end(); it++) | |
683 | { | |
248f3856 | 684 | o->newline() << "'"; |
07777235 | 685 | if (it->lb == it->ub) |
248f3856 SM |
686 | { |
687 | print_escaped (o->line(), it->lb); | |
688 | o->line() << " "; | |
689 | } | |
07777235 | 690 | else |
248f3856 SM |
691 | { |
692 | print_escaped (o->line(), it->lb); | |
693 | o->line() << "-"; | |
694 | print_escaped (o->line(), it->ub); | |
695 | } | |
07777235 | 696 | |
248f3856 | 697 | o->line() << "' -> " << it->to->label; |
07777235 SM |
698 | |
699 | if (!it->action.empty()) | |
700 | o->line() << " [" << it->action << "]"; | |
701 | } | |
702 | o->newline(-1); | |
703 | } | |
704 | ||
705 | void | |
706 | dfa::print (std::ostream& o) const | |
707 | { | |
708 | translator_output to(o); print(&to); | |
709 | } | |
710 | ||
711 | void | |
712 | dfa::print (translator_output *o) const | |
713 | { | |
248f3856 | 714 | o->newline(); |
07777235 SM |
715 | for (state *s = first; s; s = s->next) |
716 | { | |
717 | s->print(o); | |
718 | o->newline(); | |
719 | } | |
248f3856 | 720 | o->newline(); |
07777235 SM |
721 | } |
722 | ||
723 | std::ostream& | |
724 | operator << (std::ostream& o, const dfa& d) | |
725 | { | |
726 | d.print(o); | |
727 | return o; | |
728 | } | |
729 | ||
730 | std::ostream& | |
731 | operator << (std::ostream &o, const dfa *d) | |
732 | { | |
733 | o << *d; | |
734 | return o; | |
735 | } | |
736 | ||
737 | }; | |
738 | ||
739 | /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */ |