From 5b39a977ca1faed8dae65cfde76f6714b5ebb936 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 21 Jul 2010 12:09:12 +0000 Subject: [PATCH] [REGEX] calculate dfa states on demand --- libdm/libdevmapper.h | 2 ++ libdm/regex/matcher.c | 72 +++++++++++++++++++++++++------------------ 2 files changed, 44 insertions(+), 30 deletions(-) diff --git a/libdm/libdevmapper.h b/libdm/libdevmapper.h index 655d66fe0..bfd985053 100644 --- a/libdm/libdevmapper.h +++ b/libdm/libdevmapper.h @@ -1012,6 +1012,8 @@ int dm_regex_match(struct dm_regex *regex, const char *s); * fingerprints are different, then the two dfas are certainly not * isomorphic. If two fingerprints _are_ the same then it's very likely * that the dfas are isomorphic. + * + * This function must be called before any matching is done. */ uint32_t dm_regex_fingerprint(struct dm_regex *regex); diff --git a/libdm/regex/matcher.c b/libdm/regex/matcher.c index fd9db785d..949b5f4eb 100644 --- a/libdm/regex/matcher.c +++ b/libdm/regex/matcher.c @@ -209,6 +209,7 @@ static struct dfa_state *_create_state_queue(struct dm_pool *mem, dfa->bits = dm_bitset_create(mem, bits[0]); /* first element is the size */ dm_bit_copy(dfa->bits, bits); dfa->next = 0; + dfa->final = -1; return dfa; } @@ -227,7 +228,7 @@ static void _calc_state(struct dm_regex *m, struct dfa_state *dfa, int a) set_bits = 1; } - if (set_bits) { /* FIXME: this is always true */ + if (set_bits) { struct dfa_state *tmp; struct dfa_state *ldfa = ttree_lookup(m->tt, m->bs + 1); if (!ldfa) { @@ -285,20 +286,28 @@ static int _calc_states(struct dm_regex *m, struct rx_node *rx) /* prime the queue */ m->h = m->t = _create_state_queue(m->scratch, dfa, rx->firstpos); m->dfa_copy = dm_bitset_create(m->scratch, m->num_charsets); + return 1; +} + +/* + * Forces all the dfa states to be calculated up front, ie. what + * _calc_states() used to do before we switched to calculating on demand. + */ +static void _force_states(struct dm_regex *m) +{ + int a; /* keep processing until there's nothing in the queue */ struct dfa_state *s; - while ((s = m->h)) { - /* pop state off front of the queue */ - m->h = m->h->next; + while ((s = m->h)) { + /* pop state off front of the queue */ + m->h = m->h->next; - /* iterate through all the inputs for this state */ - dm_bit_clear_all(m->bs); - for (a = 0; a < 256; a++) + /* iterate through all the inputs for this state */ + dm_bit_clear_all(m->bs); + for (a = 0; a < 256; a++) _calc_state(m, s, a); - } - - return 1; + } } struct dm_regex *dm_regex_create(struct dm_pool *mem, const char **patterns, @@ -308,17 +317,12 @@ struct dm_regex *dm_regex_create(struct dm_pool *mem, const char **patterns, int i; size_t len = 0; struct rx_node *rx; - struct dm_pool *scratch = dm_pool_create("regex matcher", 10 * 1024); struct dm_regex *m; + struct dm_pool *scratch = mem; - if (!scratch) + if (!(m = dm_pool_alloc(mem, sizeof(*m)))) return_NULL; - if (!(m = dm_pool_alloc(mem, sizeof(*m)))) { - dm_pool_destroy(scratch); - return_NULL; - } - memset(m, 0, sizeof(*m)); /* join the regexps together, delimiting with zero */ @@ -359,26 +363,31 @@ struct dm_regex *dm_regex_create(struct dm_pool *mem, const char **patterns, _create_bitsets(m); _calc_functions(m); _calc_states(m, rx); - dm_pool_destroy(scratch); - m->scratch = NULL; - return m; bad: - dm_pool_destroy(scratch); dm_pool_free(mem, m); return NULL; } -static struct dfa_state *_step_matcher(int c, struct dfa_state *cs, int *r) +static struct dfa_state *_step_matcher(struct dm_regex *m, int c, struct dfa_state *cs, int *r) { - if (!(cs = cs->lookup[(unsigned char) c])) + struct dfa_state *ns; + + if (!(ns = cs->lookup[(unsigned char) c])) + _calc_state(m, cs, c); + + if (!(ns = cs->lookup[(unsigned char) c])) return NULL; - if (cs->final && (cs->final > *r)) - *r = cs->final; + // yuck, we have to special case the target trans + if (ns->final == -1) + _calc_state(m, ns, TARGET_TRANS); - return cs; + if (ns->final && (ns->final > *r)) + *r = ns->final; + + return ns; } int dm_regex_match(struct dm_regex *regex, const char *s) @@ -386,14 +395,15 @@ int dm_regex_match(struct dm_regex *regex, const char *s) struct dfa_state *cs = regex->start; int r = 0; - if (!(cs = _step_matcher(HAT_CHAR, cs, &r))) + dm_bit_clear_all(regex->bs); + if (!(cs = _step_matcher(regex, HAT_CHAR, cs, &r))) goto out; for (; *s; s++) - if (!(cs = _step_matcher(*s, cs, &r))) + if (!(cs = _step_matcher(regex, *s, cs, &r))) goto out; - _step_matcher(DOLLAR_CHAR, cs, &r); + _step_matcher(regex, DOLLAR_CHAR, cs, &r); out: /* subtract 1 to get back to zero index */ @@ -496,7 +506,7 @@ static uint32_t fingerprint_(struct printer *p) struct dfa_state *node; while ((node = pop_node_(p))) { - result = combine_(result, node->final); + result = combine_(result, node->final < 0 ? 0 : node->final); for (c = 0; c < 256; c++) result = combine_(result, push_node_(p, node->lookup[c])); @@ -511,6 +521,8 @@ uint32_t dm_regex_fingerprint(struct dm_regex *regex) struct printer p; struct dm_pool *mem = dm_pool_create("regex fingerprint", 1024); + _force_states(regex); + assert(mem); p.mem = mem; p.pending = NULL; -- 2.43.5