]>
Commit | Line | Data |
---|---|---|
cd4882d7 SM |
1 | // -*- C++ -*- |
2 | // Copyright (C) 2012-2013 Red Hat Inc. | |
3 | // | |
4 | // This file is part of systemtap, and is free software. You can | |
5 | // redistribute it and/or modify it under the terms of the GNU General | |
6 | // Public License (GPL); either version 2, or (at your option) any | |
7 | // later version. | |
8 | // | |
9 | // --- | |
10 | // | |
11 | // This file incorporates code from the re2c project; please see | |
12 | // the file README.stapregex for details. | |
13 | ||
14 | #include "util.h" | |
15 | ||
16 | #include "stapregex-tree.h" | |
17 | #include "stapregex-parse.h" | |
18 | ||
19 | #include <cstdlib> | |
20 | #include <cstring> | |
21 | #include <string> | |
22 | ||
23 | using namespace std; | |
24 | ||
25 | namespace stapregex { | |
26 | ||
07777235 SM |
27 | // TODOXXX compress / eliminate / move to util |
28 | ||
29 | // void prtChOrHex(std::ostream& o, unsigned c) | |
30 | // { | |
31 | // if (eFlag) | |
32 | // { | |
33 | // prtHex(o, c); | |
34 | // } | |
35 | // else if ((c < 256u) && (isprint(c) || isspace(c))) | |
36 | // { | |
37 | // prtCh(o, c); | |
38 | // } | |
39 | // else | |
40 | // { | |
41 | // prtHex(o, c); | |
42 | // } | |
43 | // } | |
44 | ||
45 | // void prtHex(std::ostream& o, unsigned c) | |
46 | // { | |
47 | // int oc = (int)(c); | |
48 | ||
49 | // if (re2c::uFlag) | |
50 | // { | |
51 | // o << "0x" | |
52 | // << hexCh(oc >> 28) | |
53 | // << hexCh(oc >> 24) | |
54 | // << hexCh(oc >> 20) | |
55 | // << hexCh(oc >> 16) | |
56 | // << hexCh(oc >> 12) | |
57 | // << hexCh(oc >> 8) | |
58 | // << hexCh(oc >> 4) | |
59 | // << hexCh(oc); | |
60 | // } | |
61 | // else if (re2c::wFlag) | |
62 | // { | |
63 | // o << "0x" | |
64 | // << hexCh(oc >> 12) | |
65 | // << hexCh(oc >> 8) | |
66 | // << hexCh(oc >> 4) | |
67 | // << hexCh(oc); | |
68 | // } | |
69 | // else | |
70 | // { | |
71 | // o << "0x" | |
72 | // << hexCh(oc >> 4) | |
73 | // << hexCh(oc); | |
74 | // } | |
75 | // } | |
76 | ||
77 | char octCh(unsigned c) | |
78 | { | |
79 | return '0' + c % 8; | |
80 | } | |
81 | ||
82 | void prtCh(std::ostream& o, unsigned c) | |
83 | { | |
84 | int oc = (int)(c); | |
85 | ||
86 | switch (oc) | |
87 | { | |
88 | case '\'': | |
89 | o << "\\'"; | |
90 | break; | |
91 | ||
92 | case '"': | |
93 | o << "\\\""; | |
94 | break; | |
95 | ||
96 | case '\n': | |
97 | o << "\\n"; | |
98 | break; | |
99 | ||
100 | case '\t': | |
101 | o << "\\t"; | |
102 | break; | |
103 | ||
104 | case '\v': | |
105 | o << "\\v"; | |
106 | break; | |
107 | ||
108 | case '\b': | |
109 | o << "\\b"; | |
110 | break; | |
111 | ||
112 | case '\r': | |
113 | o << "\\r"; | |
114 | break; | |
115 | ||
116 | case '\f': | |
117 | o << "\\f"; | |
118 | break; | |
119 | ||
120 | case '\a': | |
121 | o << "\\a"; | |
122 | break; | |
123 | ||
124 | case '\\': | |
125 | o << "\\\\"; | |
126 | break; | |
127 | ||
128 | default: | |
129 | ||
130 | if ((oc < 256) && isprint(oc)) | |
131 | { | |
132 | o << (char) oc; | |
133 | } | |
134 | else | |
135 | { | |
136 | o << '\\' << octCh(oc / 64) << octCh(oc / 8) << octCh(oc); | |
137 | } | |
138 | } | |
139 | } | |
140 | ||
141 | void print_escaped(std::ostream& o, char c) | |
142 | { | |
143 | prtCh(o, c); | |
144 | } | |
145 | ||
146 | // ------------------------------------------------------------------------ | |
147 | ||
c92d3b42 | 148 | cursor::cursor() : input(NULL), do_unescape(false), pos(~0), |
82c6d474 | 149 | last_pos(~0), finished(false), next_c(0), last_c(0) {} |
e5fcd199 | 150 | |
28a27de2 | 151 | cursor::cursor(const std::string *input, bool do_unescape) |
82c6d474 | 152 | : input(input), do_unescape(do_unescape), pos(0), last_pos(0), finished(false) |
cd4882d7 SM |
153 | { |
154 | next_c = 0; last_c = 0; | |
155 | finished = ( pos >= input->length() ); | |
156 | } | |
157 | ||
158 | char | |
159 | cursor::next () | |
160 | { | |
161 | if (! next_c && finished) | |
e5fcd199 | 162 | throw regex_error(_("unexpected end of regex"), pos); |
cd4882d7 | 163 | if (! next_c) |
e5fcd199 | 164 | get_unescaped(); |
cd4882d7 SM |
165 | |
166 | last_c = next_c; | |
167 | // advance by zeroing next_c | |
168 | next_c = 0; | |
169 | ||
170 | return last_c; | |
171 | } | |
172 | ||
173 | char | |
174 | cursor::peek () | |
175 | { | |
176 | if (! next_c && ! finished) | |
177 | get_unescaped(); | |
178 | ||
179 | // don't advance by zeroing next_c | |
180 | last_c = next_c; | |
181 | ||
182 | return next_c; | |
183 | } | |
184 | ||
40fd16cf SM |
185 | bool |
186 | cursor::has (unsigned n) | |
187 | { | |
e5fcd199 | 188 | return ( pos <= input->length() - n ); |
40fd16cf SM |
189 | } |
190 | ||
cd4882d7 SM |
191 | /* Systemtap doesn't unescape string literals for us, presuming to |
192 | pass the backslashes intact to a C compiler; hence we need to do | |
193 | our own unescaping here. | |
194 | ||
195 | This functionality needs to be handled as part of cursor, in order | |
196 | to correctly retain the original positions in the string when doing | |
197 | error reporting. */ | |
198 | void | |
199 | cursor::get_unescaped () | |
200 | { | |
201 | static const char *hex = "0123456789abcdef"; | |
202 | static const char *oct = "01234567"; | |
203 | ||
204 | last_pos = pos; | |
205 | char c = (*input)[pos]; | |
206 | ||
207 | if (c != '\\' || !do_unescape) | |
208 | { | |
209 | next_c = c; | |
210 | pos++; | |
248f3856 | 211 | finished = ( pos >= input->length() ); |
cd4882d7 SM |
212 | return; |
213 | } | |
214 | ||
40fd16cf SM |
215 | pos++; |
216 | ||
217 | /* Check for improper string end: */ | |
218 | if (pos >= input->length()) | |
219 | throw regex_error(_("unexpected end of regex"), pos); | |
220 | ||
cd4882d7 SM |
221 | /* The logic is based on re2c's Scanner::unescape() method; |
222 | the set of accepted escape codes should correspond to | |
223 | lexer::scan() in parse.cxx. */ | |
40fd16cf | 224 | c = (*input)[pos]; |
cd4882d7 SM |
225 | switch (c) |
226 | { | |
227 | case 'a': c = '\a'; break; | |
228 | case 'b': c = '\b'; break; | |
229 | case 't': c = '\t'; break; | |
230 | case 'n': c = '\n'; break; | |
231 | case 'v': c = '\v'; break; | |
232 | case 'f': c = '\f'; break; | |
233 | case 'r': c = '\r'; break; | |
234 | ||
235 | case 'x': | |
e5fcd199 | 236 | { |
cd4882d7 SM |
237 | if (pos >= input->length() - 2) |
238 | throw regex_error(_("two hex digits required in escape sequence"), pos); | |
239 | ||
240 | const char *d1 = strchr(hex, tolower((*input)[pos+1])); | |
241 | const char *d2 = strchr(hex, tolower((*input)[pos+2])); | |
242 | ||
243 | if (!d1 || !d2) | |
244 | throw regex_error(_("two hex digits required in escape sequence"), pos + (d1 ? 1 : 2)); | |
245 | ||
e5fcd199 | 246 | c = (char)((d1-hex) << 4) + (char)(d2-hex); |
cd4882d7 SM |
247 | pos += 2; // skip two chars more than usual |
248 | break; | |
e5fcd199 | 249 | } |
cd4882d7 SM |
250 | case '4' ... '7': |
251 | // XXX: perhaps perform error recovery (slurp 3 octal chars)? | |
252 | throw regex_error(_("octal escape sequence out of range"), pos); | |
253 | ||
254 | case '0' ... '3': | |
e5fcd199 | 255 | { |
cd4882d7 SM |
256 | if (pos >= input->length() - 2) |
257 | throw regex_error(_("three octal digits required in escape sequence"), pos); | |
258 | ||
259 | const char *d0 = strchr(oct, (*input)[pos]); | |
260 | const char *d1 = strchr(oct, (*input)[pos+1]); | |
261 | const char *d2 = strchr(oct, (*input)[pos+2]); | |
262 | ||
263 | if (!d0 || !d1 || !d2) | |
264 | throw regex_error(_("three octal digits required in escape sequence"), pos + (d1 ? 1 : 2)); | |
265 | ||
266 | c = (char)((d0-oct) << 6) + (char)((d1-oct) << 3) + (char)(d2-oct); | |
267 | pos += 2; // skip two chars more than usual | |
268 | break; | |
e5fcd199 | 269 | } |
cd4882d7 SM |
270 | default: |
271 | // do nothing; this removes the backslash from c | |
e5fcd199 | 272 | ; |
cd4882d7 SM |
273 | } |
274 | ||
275 | next_c = c; | |
276 | pos++; | |
277 | finished = ( pos >= input->length() ); | |
278 | } | |
279 | ||
280 | // ------------------------------------------------------------------------ | |
281 | ||
282 | regexp * | |
283 | regex_parser::parse (bool do_tag) | |
284 | { | |
112e9e2b | 285 | cur = cursor(&input, do_unescape); |
e5fcd199 | 286 | num_tags = 0; this->do_tag = do_tag; |
cd4882d7 | 287 | |
e5fcd199 | 288 | regexp *result = parse_expr (); |
cd4882d7 SM |
289 | |
290 | // PR15065 glom appropriate tag_ops onto the expr (subexpression 0) | |
291 | if (do_tag) { | |
292 | result = new cat_op(new tag_op(num_tags++), result); | |
293 | result = new cat_op(result, new tag_op(num_tags++)); | |
294 | } | |
295 | ||
296 | if (! cur.finished) | |
297 | { | |
298 | char c = cur.peek (); | |
299 | if (c == ')') | |
e5fcd199 | 300 | parse_error (_("unbalanced ')'"), cur.pos); |
cd4882d7 SM |
301 | else |
302 | // This should not be possible: | |
e5fcd199 | 303 | parse_error ("BUG -- regex parse failed to finish for unknown reasons", cur.pos); |
cd4882d7 SM |
304 | } |
305 | ||
306 | // PR15065 store num_tags in result | |
e5fcd199 | 307 | result->num_tags = num_tags; |
cd4882d7 SM |
308 | return result; |
309 | } | |
310 | ||
311 | bool | |
312 | regex_parser::isspecial (char c) | |
313 | { | |
314 | return ( c == '.' || c == '[' || c == '{' || c == '(' || c == ')' | |
315 | || c == '\\' || c == '*' || c == '+' || c == '?' || c == '|' | |
316 | || c == '^' || c == '$' ); | |
317 | } | |
318 | ||
319 | void | |
320 | regex_parser::expect (char expected) | |
321 | { | |
322 | char c = 0; | |
323 | try { | |
324 | c = cur.next (); | |
325 | } catch (const regex_error &e) { | |
326 | parse_error (_F("expected %c, found end of regex", expected)); | |
327 | } | |
328 | ||
329 | if (c != expected) | |
330 | parse_error (_F("expected %c, found %c", expected, c)); | |
331 | } | |
332 | ||
333 | void | |
334 | regex_parser::parse_error (const string& msg, unsigned pos) | |
335 | { | |
336 | throw regex_error(msg, pos); | |
337 | } | |
338 | ||
339 | void | |
340 | regex_parser::parse_error (const string& msg) | |
341 | { | |
342 | parse_error (msg, cur.last_pos); | |
343 | } | |
344 | ||
345 | // ------------------------------------------------------------------------ | |
346 | ||
347 | regexp * | |
348 | regex_parser::parse_expr () | |
349 | { | |
350 | regexp *result = parse_term (); | |
351 | ||
352 | char c = cur.peek (); | |
353 | while (c && c == '|') | |
354 | { | |
40fd16cf | 355 | cur.next (); |
cd4882d7 SM |
356 | regexp *alt = parse_term (); |
357 | result = make_alt (result, alt); | |
358 | c = cur.peek (); | |
359 | } | |
360 | ||
361 | return result; | |
362 | } | |
363 | ||
364 | regexp * | |
365 | regex_parser::parse_term () | |
366 | { | |
367 | regexp *result = parse_factor (); | |
368 | ||
369 | char c = cur.peek (); | |
370 | while (c && c != '|' && c != ')') | |
371 | { | |
372 | regexp *next = parse_factor (); | |
373 | result = new cat_op(result, next); | |
374 | c = cur.peek (); | |
375 | } | |
376 | ||
377 | return result; | |
378 | } | |
379 | ||
380 | regexp * | |
381 | regex_parser::parse_factor () | |
382 | { | |
383 | regexp *result; | |
384 | regexp *old_result = NULL; | |
385 | ||
386 | char c = cur.peek (); | |
387 | if (! c || c == '|' || c == ')') | |
388 | { | |
389 | result = new null_op; | |
390 | return result; | |
391 | } | |
392 | else if (c == '*' || c == '+' || c == '?' || c == '{') | |
393 | { | |
394 | parse_error(_F("unexpected '%c'", c)); | |
395 | } | |
396 | ||
397 | if (isspecial (c) && c != '\\') | |
398 | cur.next (); // c is guaranteed to be swallowed | |
399 | ||
400 | if (c == '.') | |
401 | { | |
402 | result = make_dot (); | |
403 | } | |
404 | else if (c == '[') | |
405 | { | |
406 | result = parse_char_range (); | |
407 | expect (']'); | |
408 | } | |
409 | else if (c == '(') | |
410 | { | |
411 | result = parse_expr (); | |
412 | ||
413 | // PR15065 glom appropriate tag_ops onto the expr | |
414 | if (do_tag) { | |
415 | result = new cat_op(new tag_op(num_tags++), result); | |
416 | result = new cat_op(result, new tag_op(num_tags++)); | |
d6833508 SM |
417 | } else { |
418 | // XXX: workaround for certain error checking test cases which | |
419 | // would otherwise produce divergent behaviour | |
420 | // (e.g. "^*" vs "(^)*"). | |
421 | result = new cat_op(result, new null_op); | |
cd4882d7 SM |
422 | } |
423 | ||
424 | expect (')'); | |
425 | } | |
426 | else if (c == '^' || c == '$') | |
427 | { | |
428 | result = new anchor_op(c); | |
429 | } | |
430 | else // escaped or ordinary character -- not yet swallowed | |
431 | { | |
432 | string accumulate; | |
433 | char d = 0; | |
434 | ||
435 | while (c && ( ! isspecial (c) || c == '\\' )) | |
436 | { | |
437 | if (c == '\\') | |
438 | { | |
439 | cur.next (); | |
440 | c = cur.peek (); | |
441 | } | |
442 | ||
443 | cur.next (); | |
444 | d = cur.peek (); | |
445 | ||
446 | /* if we end in a closure, it should only govern the last character */ | |
447 | if (d == '*' || d == '+' || d == '?' || d == '{') | |
448 | { | |
449 | /* save the last character */ | |
450 | d = c; break; | |
451 | } | |
452 | ||
453 | accumulate.push_back (c); | |
454 | c = d; d = 0; | |
455 | } | |
456 | ||
457 | result = str_to_re (accumulate); | |
458 | ||
459 | /* separately deal with the last character before a closure */ | |
460 | if (d != 0) { | |
461 | old_result = result; /* will add it back outside closure at the end */ | |
e5fcd199 | 462 | result = str_to_re (string(1,d)); |
cd4882d7 SM |
463 | } |
464 | } | |
465 | ||
466 | /* parse closures or other postfix operators */ | |
467 | c = cur.peek (); | |
468 | while (c == '*' || c == '+' || c == '?' || c == '{') | |
469 | { | |
470 | cur.next (); | |
471 | ||
472 | /* closure-type operators applied to $^ are definitely not kosher */ | |
473 | if (result->type_of() == "anchor_op") | |
474 | { | |
475 | parse_error(_F("postfix closure '%c' applied to anchoring operator", c)); | |
476 | } | |
477 | ||
478 | if (c == '*') | |
479 | { | |
480 | result = make_alt (new close_op(result), new null_op); | |
481 | } | |
482 | else if (c == '+') | |
483 | { | |
484 | result = new close_op(result); | |
485 | } | |
486 | else if (c == '?') | |
487 | { | |
488 | result = make_alt (result, new null_op); | |
489 | } | |
490 | else if (c == '{') | |
491 | { | |
492 | int minsize = parse_number (); | |
493 | int maxsize = -1; | |
494 | ||
495 | c = cur.next (); | |
496 | if (c == ',') | |
497 | { | |
498 | c = cur.peek (); | |
499 | if (c == '}') | |
500 | { | |
501 | cur.next (); | |
502 | maxsize = -1; | |
503 | } | |
504 | else if (isdigit (c)) | |
505 | { | |
506 | maxsize = parse_number (); | |
507 | expect ('}'); | |
508 | } | |
509 | else | |
510 | parse_error(_("expected '}' or number"), cur.pos); | |
511 | } | |
512 | else if (c == '}') | |
513 | { | |
514 | maxsize = minsize; | |
515 | } | |
516 | else | |
517 | parse_error(_("expected ',' or '}'")); | |
518 | ||
519 | /* optimize {0,0}, {0,} and {1,} */ | |
e5fcd199 | 520 | if (!do_tag && minsize == 0 && maxsize == 0) |
cd4882d7 | 521 | { |
e5fcd199 SM |
522 | // XXX: this optimization is only used when |
523 | // subexpression-extraction is disabled | |
cd4882d7 SM |
524 | delete result; |
525 | result = new null_op; | |
526 | } | |
527 | else if (minsize == 0 && maxsize == -1) | |
528 | { | |
529 | result = make_alt (new close_op(result), new null_op); | |
530 | } | |
531 | else if (minsize == 1 && maxsize == -1) | |
532 | { | |
533 | result = new close_op(result); | |
534 | } | |
535 | else | |
536 | { | |
537 | result = new closev_op(result, minsize, maxsize); | |
538 | } | |
539 | } | |
540 | ||
541 | c = cur.peek (); | |
542 | } | |
543 | ||
544 | if (old_result) | |
545 | result = new cat_op(old_result, result); | |
546 | ||
547 | return result; | |
548 | } | |
549 | ||
cd4882d7 SM |
550 | regexp * |
551 | regex_parser::parse_char_range () | |
552 | { | |
40fd16cf | 553 | range *ran = NULL; |
cd4882d7 | 554 | |
40fd16cf | 555 | // check for inversion |
cd4882d7 SM |
556 | bool inv = false; |
557 | char c = cur.peek (); | |
558 | if (c == '^') | |
559 | { | |
560 | inv = true; | |
561 | cur.next (); | |
cd4882d7 SM |
562 | } |
563 | ||
cd4882d7 SM |
564 | for (;;) |
565 | { | |
40fd16cf | 566 | // break on string end whenever we encounter it |
e5fcd199 | 567 | if (cur.finished) parse_error(_("unclosed character class")); // TODOXXX doublecheck that this is triggered correctly |
cd4882d7 | 568 | |
e5fcd199 | 569 | range *add = stapregex_getrange (cur); |
28a27de2 SM |
570 | range *new_ran = ( ran != NULL ? range_union(ran, add) : add ); |
571 | delete ran; if (new_ran != add) delete add; | |
572 | ran = new_ran; | |
cd4882d7 | 573 | |
40fd16cf | 574 | // break on ']' (except at the start of the class) |
248f3856 | 575 | c = cur.peek (); |
40fd16cf | 576 | if (c == ']') |
248f3856 | 577 | break; |
cd4882d7 SM |
578 | } |
579 | ||
e5fcd199 SM |
580 | if (inv) |
581 | { | |
28a27de2 SM |
582 | range *new_ran = range_invert(ran); |
583 | delete ran; | |
584 | ran = new_ran; | |
e5fcd199 SM |
585 | } |
586 | ||
40fd16cf SM |
587 | if (ran == NULL) |
588 | return new null_op; | |
589 | ||
590 | return new match_op(ran); | |
cd4882d7 SM |
591 | } |
592 | ||
593 | unsigned | |
594 | regex_parser::parse_number () | |
595 | { | |
596 | string digits; | |
597 | ||
598 | char c = cur.peek (); | |
599 | while (c && isdigit (c)) | |
600 | { | |
601 | cur.next (); | |
602 | digits.push_back (c); | |
603 | c = cur.peek (); | |
604 | } | |
605 | ||
606 | if (digits == "") parse_error(_("expected number"), cur.pos); | |
607 | ||
608 | char *endptr = NULL; | |
609 | int val = strtol (digits.c_str (), &endptr, 10); | |
610 | ||
611 | if (*endptr != '\0' || errno == ERANGE) // paranoid error checking | |
612 | parse_error(_F("could not parse number %s", digits.c_str()), cur.pos); | |
613 | #define MAX_DFA_REPETITIONS 12345 | |
614 | if (val >= MAX_DFA_REPETITIONS) // XXX: is there a more sensible max size? | |
615 | parse_error(_F("%s is too large", digits.c_str()), cur.pos); | |
616 | ||
617 | return atoi (digits.c_str ()); | |
618 | } | |
619 | ||
cd4882d7 SM |
620 | // ------------------------------------------------------------------------ |
621 | ||
e5fcd199 | 622 | std::map<std::string, range *> named_char_classes; |
40fd16cf SM |
623 | |
624 | range * | |
625 | named_char_class (const string& name) | |
cd4882d7 | 626 | { |
40fd16cf SM |
627 | // static initialization of table |
628 | if (named_char_classes.empty()) | |
629 | { | |
630 | // original source for these is http://www.regular-expressions.info/posixbrackets.html | |
631 | // also checked against (intended to match) the c stdlib isFOO() chr class functions | |
632 | named_char_classes["alpha"] = new range("A-Za-z"); | |
633 | named_char_classes["alnum"] = new range("A-Za-z0-9"); | |
634 | named_char_classes["blank"] = new range(" \t"); | |
635 | named_char_classes["cntrl"] = new range("\x01-\x1F\x7F"); // XXX: include \x00 in range? -- probably not! | |
636 | named_char_classes["d"] = named_char_classes["digit"] = new range("0-9"); | |
637 | named_char_classes["xdigit"] = new range("0-9a-fA-F"); | |
638 | named_char_classes["graph"] = new range("\x21-\x7E"); | |
639 | named_char_classes["l"] = named_char_classes["lower"] = new range("a-z"); | |
640 | named_char_classes["print"] = new range("\x20-\x7E"); | |
d6833508 | 641 | named_char_classes["punct"] = new range("!\"#$%&'()*+,./:;<=>?@[\\]^_`{|}~-"); |
40fd16cf SM |
642 | named_char_classes["s"] = named_char_classes["space"] = new range(" \t\r\n\v\f"); |
643 | named_char_classes["u"] = named_char_classes["upper"] = new range("A-Z"); | |
644 | } | |
cd4882d7 | 645 | |
40fd16cf | 646 | if (named_char_classes.find(name) == named_char_classes.end()) |
cd4882d7 | 647 | { |
e5fcd199 | 648 | throw regex_error (_F("unknown character class '%s'", name.c_str())); // XXX: position unknown |
40fd16cf | 649 | } |
cd4882d7 | 650 | |
40fd16cf SM |
651 | return new range(*named_char_classes[name]); |
652 | } | |
653 | ||
654 | range * | |
655 | stapregex_getrange (cursor& cur) | |
656 | { | |
657 | char c = cur.peek (); | |
40fd16cf SM |
658 | |
659 | if (c == '\\') | |
660 | { | |
661 | // Grab escaped char regardless of what it is. | |
662 | cur.next (); c = cur.peek (); cur.next (); | |
663 | } | |
664 | else if (c == '[') | |
665 | { | |
666 | // Check for '[:' digraph. | |
667 | char old_c = c; cur.next (); c = cur.peek (); | |
668 | ||
669 | if (c == ':') | |
cd4882d7 | 670 | { |
248f3856 | 671 | cur.next (); c = cur.peek (); // skip ':' |
40fd16cf | 672 | string charclass; |
cd4882d7 | 673 | |
40fd16cf SM |
674 | for (;;) |
675 | { | |
676 | if (cur.finished) | |
677 | throw regex_error (_F("unclosed character class '[:%s'", charclass.c_str()), cur.pos); | |
cd4882d7 | 678 | |
e5fcd199 | 679 | if (cur.has(2) && c == ':' && (*cur.input)[cur.pos] == ']') |
40fd16cf SM |
680 | { |
681 | cur.next (); cur.next (); // skip ':]' | |
682 | return named_char_class(charclass); | |
683 | } | |
684 | ||
685 | charclass.push_back(c); cur.next(); c = cur.peek(); | |
686 | } | |
cd4882d7 | 687 | } |
40fd16cf SM |
688 | else |
689 | { | |
690 | // Backtrack; fall through to processing c. | |
691 | c = old_c; | |
692 | } | |
693 | } | |
694 | else | |
695 | cur.next (); | |
696 | ||
697 | char lb = c, ub; | |
698 | ||
d6833508 | 699 | if (!cur.has(2) || cur.peek () != '-' || (*cur.input)[cur.pos] == ']') |
40fd16cf SM |
700 | { |
701 | ub = lb; | |
cd4882d7 | 702 | } |
40fd16cf SM |
703 | else |
704 | { | |
705 | cur.next (); // skip '-' | |
706 | ub = cur.peek (); | |
707 | ||
708 | if (ub < lb) | |
709 | throw regex_error (_F("Inverted character range %c-%c", lb, ub), cur.pos); | |
710 | ||
711 | cur.next (); | |
712 | } | |
713 | ||
714 | return new range(lb, ub); | |
cd4882d7 SM |
715 | } |
716 | ||
40fd16cf SM |
717 | }; |
718 | ||
cd4882d7 | 719 | /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */ |