]> sourceware.org Git - lvm2.git/commitdiff
[REGEX] reduce the number of charset nodes that are produced
authorJoe Thornber <thornber@redhat.com>
Wed, 21 Jul 2010 11:58:49 +0000 (11:58 +0000)
committerJoe Thornber <thornber@redhat.com>
Wed, 21 Jul 2010 11:58:49 +0000 (11:58 +0000)
libdm/regex/matcher.c
libdm/regex/parse_rx.c
libdm/regex/parse_rx.h

index d8d856c84385c5b2546b69213da5267901ecb504..07ba9e4a3f9a0b5c03317ea98b84e3ea401a604c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.  
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
  * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
  *
  * This file is part of the device-mapper userspace tools.
@@ -32,8 +32,11 @@ struct state_queue {
 struct dm_regex {              /* Instance variables for the lexer */
        struct dfa_state *start;
        unsigned num_nodes;
+        unsigned num_charsets;
        int nodes_entered;
        struct rx_node **nodes;
+        int charsets_entered;
+        struct rx_node **charsets;
        struct dm_pool *scratch, *mem;
 };
 
@@ -50,6 +53,33 @@ static int _count_nodes(struct rx_node *rx)
        return r;
 }
 
+static unsigned _count_charsets(struct rx_node *rx)
+{
+        if (rx->type == CHARSET)
+                return 1;
+
+        return (rx->left ? _count_charsets(rx->left) : 0) +
+                (rx->right ? _count_charsets(rx->right) : 0);
+}
+
+static void _enumerate_charsets_internal(struct rx_node *rx, unsigned *i)
+{
+        if (rx->type == CHARSET)
+                rx->charset_index = (*i)++;
+        else {
+                if (rx->left)
+                        _enumerate_charsets_internal(rx->left, i);
+                if (rx->right)
+                        _enumerate_charsets_internal(rx->right, i);
+        }
+}
+
+static void _enumerate_charsets(struct rx_node *rx)
+{
+        unsigned i = 0;
+        _enumerate_charsets_internal(rx, &i);
+}
+
 static void _fill_table(struct dm_regex *m, struct rx_node *rx)
 {
        assert((rx->type != OR) || (rx->left && rx->right));
@@ -61,6 +91,8 @@ static void _fill_table(struct dm_regex *m, struct rx_node *rx)
                _fill_table(m, rx->right);
 
        m->nodes[m->nodes_entered++] = rx;
+        if (rx->type == CHARSET)
+                m->charsets[m->charsets_entered++] = rx;
 }
 
 static void _create_bitsets(struct dm_regex *m)
@@ -69,9 +101,9 @@ static void _create_bitsets(struct dm_regex *m)
 
        for (i = 0; i < m->num_nodes; i++) {
                struct rx_node *n = m->nodes[i];
-               n->firstpos = dm_bitset_create(m->scratch, m->num_nodes);
-               n->lastpos = dm_bitset_create(m->scratch, m->num_nodes);
-               n->followpos = dm_bitset_create(m->scratch, m->num_nodes);
+               n->firstpos = dm_bitset_create(m->scratch, m->num_charsets);
+               n->lastpos = dm_bitset_create(m->scratch, m->num_charsets);
+               n->followpos = dm_bitset_create(m->scratch, m->num_charsets);
        }
 }
 
@@ -85,7 +117,7 @@ static void _calc_functions(struct dm_regex *m)
                c1 = rx->left;
                c2 = rx->right;
 
-               if (dm_bit(rx->charset, TARGET_TRANS))
+               if (rx->type == CHARSET && dm_bit(rx->charset, TARGET_TRANS))
                        rx->final = final++;
 
                switch (rx->type) {
@@ -125,8 +157,8 @@ static void _calc_functions(struct dm_regex *m)
                        break;
 
                case CHARSET:
-                       dm_bit_set(rx->firstpos, i);
-                       dm_bit_set(rx->lastpos, i);
+                       dm_bit_set(rx->firstpos, rx->charset_index);
+                       dm_bit_set(rx->lastpos, rx->charset_index);
                        rx->nullable = 0;
                        break;
 
@@ -141,23 +173,21 @@ static void _calc_functions(struct dm_regex *m)
                 */
                switch (rx->type) {
                case CAT:
-                       for (j = 0; j < m->num_nodes; j++) {
-                               if (dm_bit(c1->lastpos, j)) {
-                                       struct rx_node *n = m->nodes[j];
+                       for (j = 0; j < m->num_charsets; j++) {
+                                struct rx_node *n = m->charsets[j];
+                               if (dm_bit(c1->lastpos, j))
                                        dm_bit_union(n->followpos,
-                                                 n->followpos, c2->firstpos);
-                               }
+                                                     n->followpos, c2->firstpos);
                        }
                        break;
 
                case PLUS:
                case STAR:
-                       for (j = 0; j < m->num_nodes; j++) {
-                               if (dm_bit(rx->lastpos, j)) {
-                                       struct rx_node *n = m->nodes[j];
+                       for (j = 0; j < m->num_charsets; j++) {
+                                struct rx_node *n = m->charsets[j];
+                               if (dm_bit(rx->lastpos, j))
                                        dm_bit_union(n->followpos,
-                                                 n->followpos, rx->firstpos);
-                               }
+                                                     n->followpos, rx->firstpos);
                        }
                        break;
                }
@@ -189,7 +219,7 @@ static struct state_queue *_create_state_queue(struct dm_pool *mem,
 
 static int _calc_states(struct dm_regex *m, struct rx_node *rx)
 {
-       unsigned iwidth = (m->num_nodes / DM_BITS_PER_INT) + 1;
+       unsigned iwidth = (m->num_charsets / DM_BITS_PER_INT) + 1;
        struct ttree *tt = ttree_create(m->scratch, iwidth);
        struct state_queue *h, *t, *tmp;
        struct dfa_state *dfa, *ldfa;
@@ -199,9 +229,26 @@ static int _calc_states(struct dm_regex *m, struct rx_node *rx)
        if (!tt)
                return_0;
 
-       if (!(bs = dm_bitset_create(m->scratch, m->num_nodes)))
+       if (!(bs = dm_bitset_create(m->scratch, m->num_charsets)))
                return_0;
 
+        /* build some char maps */
+        dm_bitset_t charmap[256];
+        for (a = 0; a < 256; a++) {
+                charmap[a] = dm_bitset_create(m->scratch, m->num_charsets);
+                if (!charmap[a])
+                        return_0;
+        }
+
+        for (i = 0; i < m->num_nodes; i++) {
+                struct rx_node *n = m->nodes[i];
+                if (n->type == CHARSET) {
+                        for (a = dm_bit_get_first(n->charset);
+                             a >= 0; a = dm_bit_get_next(n->charset, a))
+                                dm_bit_set(charmap[a], n->charset_index);
+                }
+        }
+
        /* create first state */
        dfa = _create_dfa_state(m->mem);
        m->start = dfa;
@@ -209,8 +256,9 @@ static int _calc_states(struct dm_regex *m, struct rx_node *rx)
 
        /* prime the queue */
        h = t = _create_state_queue(m->scratch, dfa, rx->firstpos);
+        dm_bitset_t dfa_copy = dm_bitset_create(m->scratch, m->num_charsets);
        while (h) {
-               int elems, j, idx[h->bits[0]];
+               int elems, idx[h->bits[0]];
 
                /* pop state off front of the queue */
                dfa = h->s;
@@ -227,18 +275,18 @@ static int _calc_states(struct dm_regex *m, struct rx_node *rx)
                        idx[elems++] = i;
 
                for (a = 0; a < 256; a++) {
+                        dm_bit_and(dfa_copy, charmap[a], dfa_bits);
+
                        /* iterate through all the states in firstpos */
-                       for (j = 0; j < elems; j++) {
-                               i = idx[j];
-                               if (dm_bit(m->nodes[i]->charset, a)) {
-                                       if (a == TARGET_TRANS)
-                                               dfa->final = m->nodes[i]->final;
-
-                                       dm_bit_union(bs, bs,
-                                                 m->nodes[i]->followpos);
-                                       set_bits = 1;
-                               }
-                       }
+                        for (i = dm_bit_get_first(dfa_copy);
+                             i >= 0; i = dm_bit_get_next(dfa_copy, i)) {
+                                if (a == TARGET_TRANS)
+                                        dfa->final = m->charsets[i]->final;
+
+                                dm_bit_union(bs, bs,
+                                             m->charsets[i]->followpos);
+                                set_bits = 1;
+                        }
 
                        if (set_bits) {
                                ldfa = ttree_lookup(tt, bs + 1);
@@ -314,11 +362,16 @@ struct dm_regex *dm_regex_create(struct dm_pool *mem, const char **patterns,
        m->mem = mem;
        m->scratch = scratch;
        m->num_nodes = _count_nodes(rx);
+        m->num_charsets = _count_charsets(rx);
+        _enumerate_charsets(rx);
        m->nodes = dm_pool_alloc(scratch, sizeof(*m->nodes) * m->num_nodes);
-
        if (!m->nodes)
                goto_bad;
 
+        m->charsets = dm_pool_alloc(scratch, sizeof(*m->charsets) * m->num_charsets);
+        if (!m->charsets)
+                goto_bad;
+
        _fill_table(m, rx);
        _create_bitsets(m);
        _calc_functions(m);
index 20f8c30e23821e9f2df74f8cfdeb7e0f551f004c..56260f54e224c2d8cfe1ad67e14aecac0390c8a1 100644 (file)
@@ -284,7 +284,7 @@ static struct rx_node *_node(struct dm_pool *mem, int type,
        struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n));
 
        if (n) {
-               if (!(n->charset = dm_bitset_create(mem, 256))) {
+               if (type == CHARSET && !(n->charset = dm_bitset_create(mem, 256))) {
                        dm_pool_free(mem, n);
                        return NULL;
                }
index 4a5d8b94d61188f16251bb06557798e9d99cc114..37d7ced8ac414d599d65889519bde620db2adf4d 100644 (file)
@@ -41,6 +41,7 @@ struct rx_node {
        struct rx_node *left, *right;
 
        /* used to build the dfa for the toker */
+        unsigned charset_index;
        int nullable, final;
        dm_bitset_t firstpos;
        dm_bitset_t lastpos;
This page took 0.04207 seconds and 5 git commands to generate.