The more complex patch ================================================ 2002-10-22 Paolo Bonzini (bonzini@gnu.org) * posix/regex_internal.h (re_state_table_entry): add fields for more aggressive caching * posix/regexec.c (sift_states_bkref): bail out of the innermost loop as soon as possible. Use the new fields in the backreference cache. (match_ctx_add_entry): cache more aggressively. Callers adjusted. diff -prU3 save/regex_internal.h ./regex_internal.h --- save/regex_internal.h Tue Oct 22 22:57:38 2002 +++ ./regex_internal.h Tue Oct 22 22:58:38 2002 @@ -400,10 +400,11 @@ struct re_state_table_entry struct re_backref_cache_entry { - int node; + int node, dst_node; int str_idx; int subexp_from; int subexp_to; + int subexp_len; int flag; }; diff -prU3 save/regexec.c ./regexec.c --- save/regexec.c Tue Oct 22 22:35:32 2002 +++ ./regexec.c Tue Oct 22 23:05:04 2002 @@ -46,8 +46,8 @@ static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, re_string_t *input, int n); static void match_ctx_free (re_match_context_t *cache); -static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, - int str_idx, int from, int to); +static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, re_dfa_t *dfa, + int node, int str_idx, int from, int to); static void match_ctx_clear_flag (re_match_context_t *mctx); static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, re_dfastate_t **limited_sts, int last_node, @@ -855,7 +855,7 @@ check_matching (preg, mctx, fl_search, f if (clexp_node->type == OP_CLOSE_SUBEXP && clexp_node->opr.idx + 1== dfa->nodes[node].opr.idx) { - err = match_ctx_add_entry (mctx, node, 0, 0, 0); + err = match_ctx_add_entry (mctx, dfa, node, 0, 0, 0); if (BE (err != REG_NOERROR, 0)) return -2; break; @@ -1861,7 +1861,7 @@ search_subexp (preg, mctx, sctx, str_idx } /* Successfully matched, add a new cache entry. */ dest_str_idx = bkref_str_idx + subexp_len; - err = match_ctx_add_entry (mctx, sctx->cur_bkref, bkref_str_idx, + err = match_ctx_add_entry (mctx, dfa, sctx->cur_bkref, bkref_str_idx, str_idx, sctx->cls_subexp_idx); if (BE (err != REG_NOERROR, 0)) return err; @@ -1913,12 +1913,13 @@ sift_states_bkref (preg, mctx, sctx, str int cur_bkref_idx = re_string_cur_idx (mctx->input); re_token_type_t type; node = candidates->elems[node_idx]; - type = dfa->nodes[node].type; if (node == sctx->cur_bkref && str_idx == cur_bkref_idx) continue; /* Avoid infinite loop for the REs like "()\1+". */ if (node == sctx->last_node && str_idx == sctx->last_str_idx) continue; + + type = dfa->nodes[node].type; if (type == OP_BACK_REF) { int enabled_idx; @@ -1927,15 +1928,16 @@ sift_states_bkref (preg, mctx, sctx, str int disabled_idx, subexp_len, to_idx, dst_node; struct re_backref_cache_entry *entry; entry = mctx->bkref_ents + enabled_idx; - subexp_len = entry->subexp_to - entry->subexp_from; - to_idx = str_idx + subexp_len; - dst_node = (subexp_len ? dfa->nexts[node] - : dfa->edests[node].elems[0]); + if (entry->node != node || entry->str_idx != str_idx) + continue; - if (entry->node != node || entry->str_idx != str_idx - || to_idx > sctx->last_str_idx + subexp_len = entry->subexp_len; + to_idx = str_idx + subexp_len; + if (to_idx > sctx->last_str_idx || sctx->sifted_states[to_idx] == NULL) continue; + + dst_node = entry->dst_node; if (!STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)) continue; @@ -1949,7 +1951,7 @@ sift_states_bkref (preg, mctx, sctx, str if (strncmp (buf + entry->subexp_from, buf + cur_bkref_idx, subexp_len) != 0) continue; - err = match_ctx_add_entry (mctx, sctx->cur_bkref, + err = match_ctx_add_entry (mctx, dfa, sctx->cur_bkref, cur_bkref_idx, entry->subexp_from, entry->subexp_to); if (BE (err != REG_NOERROR, 0)) @@ -2399,7 +2401,7 @@ transit_state_bkref_loop (preg, nodes, w bkref_ent = mctx->bkref_ents + bkc_idx; if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) continue; - subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; + subexp_len = bkref_ent->subexp_len; new_dest_nodes = (subexp_len == 0 ? dfa->eclosures + dfa->edests[node_idx].elems[0] : dfa->eclosures + dfa->nexts[node_idx]); @@ -3120,8 +3122,9 @@ match_ctx_free (mctx) /* Add a new backreference entry to the cache. */ static reg_errcode_t -match_ctx_add_entry (mctx, node, str_idx, from, to) +match_ctx_add_entry (mctx, dfa, node, str_idx, from, to) re_match_context_t *mctx; + re_dfa_t *dfa; int node, str_idx, from, to; { if (mctx->nbkref_ents >= mctx->abkref_ents) @@ -3139,6 +3142,10 @@ match_ctx_add_entry (mctx, node, str_idx mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; + mctx->bkref_ents[mctx->nbkref_ents].subexp_len = to - from; + mctx->bkref_ents[mctx->nbkref_ents].dst_node = to == from + ? dfa->edests[node].elems[0] : dfa->nexts[node]; + mctx->bkref_ents[mctx->nbkref_ents++].flag = 0; if (mctx->max_mb_elem_len < to - from) mctx->max_mb_elem_len = to - from;