From 22a0881ded653d91f140aa11ecdb87de339e5d93 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 29 Aug 2002 14:46:30 +0000 Subject: [PATCH] o Anchor support for the regex engine. --- lib/regex/matcher.c | 25 +++++++++++++++++++------ lib/regex/parse_rx.c | 18 ++++++++++++++++-- lib/regex/parse_rx.h | 12 +++++++++--- 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/lib/regex/matcher.c b/lib/regex/matcher.c index 8a607463d..ccf6b168c 100644 --- a/lib/regex/matcher.c +++ b/lib/regex/matcher.c @@ -330,20 +330,33 @@ struct matcher *matcher_create(struct pool *mem, const char **patterns, int num) return NULL; } +static struct dfa_state * +_step_matcher(unsigned char c, struct dfa_state *cs, int *r) +{ + if (!(cs = cs->lookup[c])) + return NULL; + + if (cs->final && (cs->final > *r)) + *r = cs->final; + + return cs; +} + int matcher_run(struct matcher *m, const char *b) { struct dfa_state *cs = m->start; int r = 0; - for (; *b; b++) { + if (!(cs = _step_matcher(HAT_CHAR, cs, &r))) + goto out; - if (!(cs = cs->lookup[(int) (unsigned char) *b])) - break; + for (; *b; b++) + if (!(cs = _step_matcher(*b, cs, &r))) + goto out; - if (cs->final && (cs->final > r)) - r = cs->final; - } + _step_matcher(DOLLAR_CHAR, cs, &r); + out: /* subtract 1 to get back to zero index */ return r - 1; } diff --git a/lib/regex/parse_rx.c b/lib/regex/parse_rx.c index 7dcef82fb..108ec4178 100644 --- a/lib/regex/parse_rx.c +++ b/lib/regex/parse_rx.c @@ -22,6 +22,14 @@ struct parse_sp { /* scratch pad for the parsing process */ static struct rx_node *_or_term(struct parse_sp *ps); +static void _single_char(struct parse_sp *ps, unsigned int c, const char *ptr) +{ + ps->type = 0; + ps->cursor = ptr + 1; + bit_clear_all(ps->charset); + bit_set(ps->charset, c); +} + /* * Get the next token from the regular expression. * Returns: 1 success, 0 end of input, -1 error. @@ -125,12 +133,18 @@ static int _get_token(struct parse_sp *ps) case '+': case '?': case '|': - case '^': - case '$': ps->type = (int) *ptr; ps->cursor = ptr + 1; break; + case '^': + _single_char(ps, HAT_CHAR, ptr); + break; + + case '$': + _single_char(ps, DOLLAR_CHAR, ptr); + break; + case '.': /* The 'all but newline' character set */ ps->type = 0; diff --git a/lib/regex/parse_rx.h b/lib/regex/parse_rx.h index f75c99502..4112b1265 100644 --- a/lib/regex/parse_rx.h +++ b/lib/regex/parse_rx.h @@ -15,11 +15,17 @@ enum { PLUS, OR, QUEST, - CHARSET, - HAT, - DOLLAR + CHARSET }; +/* + * We're never going to be running the regex on non-printable + * chars, so we can use a couple of these chars to represent the + * start and end of a string. + */ +#define HAT_CHAR 0x2 +#define DOLLAR_CHAR 0x2 + struct rx_node { int type; bitset_t charset; -- 2.43.5