[PATCH] Fix ^[^#]*\bflistdel regex compilation
Jakub Jelinek
jakub@redhat.com
Sun Dec 21 21:23:00 GMT 2003
Hi!
Seems too many places in regexec.c and regcomp.c rely on edests to have at
least one element for OP_BACK_REF and epsilon nodes.
Following patch should have no runtime impact on not \b\B\<\> using regexps
and even for those which use them the impact should be very low.
Adding dfa->edests[X].nelems > 0 checks all around would probably be more
expensive.
2003-12-21 Jakub Jelinek <jakub@redhat.com>
* posix/regcomp.c (duplicate_node, duplicate_node_closure): Revert
2003-11-24 changes.
* posix/regexec.c (group_nodes_into_DFAstates): For CHARACTER with
NEXT_{,NOT}WORD_CONSTRAINT check word_char bit.
* posix/bug-regex19.c (tests): Add new tests.
--- libc/posix/regcomp.c.jj 2003-12-21 13:15:16.000000000 +0100
+++ libc/posix/regcomp.c 2003-12-21 22:11:17.000000000 +0100
@@ -1336,8 +1336,6 @@ duplicate_node_closure (dfa, top_org_nod
if (BE (err != REG_NOERROR, 0))
return err;
dfa->nexts[clone_node] = dfa->nexts[org_node];
- if (clone_dest == -1)
- break;
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
if (BE (ret < 0, 0))
return REG_ESPACE;
@@ -1375,8 +1373,6 @@ duplicate_node_closure (dfa, top_org_nod
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
if (BE (err != REG_NOERROR, 0))
return err;
- if (clone_dest == -1)
- break;
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
if (BE (ret < 0, 0))
return REG_ESPACE;
@@ -1395,16 +1391,13 @@ duplicate_node_closure (dfa, top_org_nod
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
if (BE (err != REG_NOERROR, 0))
return err;
- if (clone_dest != -1)
- {
- ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
- if (BE (ret < 0, 0))
- return REG_ESPACE;
- err = duplicate_node_closure (dfa, org_dest, clone_dest,
- root_node, constraint);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ err = duplicate_node_closure (dfa, org_dest, clone_dest,
+ root_node, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
}
else
{
@@ -1419,8 +1412,6 @@ duplicate_node_closure (dfa, top_org_nod
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
if (BE (err != REG_NOERROR, 0))
return err;
- if (clone_dest == -1)
- break;
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
if (BE (ret < 0, 0))
return REG_ESPACE;
@@ -1460,21 +1451,7 @@ duplicate_node (new_idx, dfa, org_idx, c
int *new_idx, org_idx;
unsigned int constraint;
{
- int dup_idx;
-
- if (dfa->nodes[org_idx].type == CHARACTER
- && (((constraint & NEXT_WORD_CONSTRAINT)
- && !dfa->nodes[org_idx].word_char)
- || ((constraint & NEXT_NOTWORD_CONSTRAINT)
- && dfa->nodes[org_idx].word_char)))
- {
- /* \<!, \>W etc. can never match. Don't duplicate them, instead
- tell the caller they shouldn't be added to edests. */
- *new_idx = -1;
- return REG_NOERROR;
- }
-
- dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx], 1);
+ int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx], 1);
if (BE (dup_idx == -1, 0))
return REG_ESPACE;
dfa->nodes[dup_idx].constraint = constraint;
--- libc/posix/bug-regex19.c.jj 2003-11-24 23:49:52.000000000 +0100
+++ libc/posix/bug-regex19.c 2003-12-21 22:10:56.000000000 +0100
@@ -246,6 +246,9 @@ static struct test_s
{ERE, "(\\<|[A].)[A~C]", "DACC", 0, 1},
{ERE, "(\\<|[A].)[A~C]", "B!A=", 0, 2},
{ERE, "(\\<|[A].)[A~C]", "B~C", 0, 2},
+ {ERE, "^[^A]*\\bB", "==B", 0, 0},
+ {ERE, "^[^A]*\\bB", "CBD!=B", 0, 0},
+ {ERE, "[^A]*\\bB", "==B", 2, 2}
};
int
--- libc/posix/regexec.c.jj 2003-12-21 20:59:10.000000000 +0100
+++ libc/posix/regexec.c 2003-12-21 21:46:47.000000000 +0100
@@ -3416,6 +3416,11 @@ group_nodes_into_DFAstates (preg, state,
if (constraint & NEXT_WORD_CONSTRAINT)
{
unsigned int any_set = 0;
+ if (type == CHARACTER && !node->word_char)
+ {
+ bitset_empty (accepts);
+ continue;
+ }
#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
for (j = 0; j < BITSET_UINTS; ++j)
@@ -3430,6 +3435,11 @@ group_nodes_into_DFAstates (preg, state,
if (constraint & NEXT_NOTWORD_CONSTRAINT)
{
unsigned int any_set = 0;
+ if (type == CHARACTER && node->word_char)
+ {
+ bitset_empty (accepts);
+ continue;
+ }
#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
for (j = 0; j < BITSET_UINTS; ++j)
Jakub
More information about the Libc-hacker
mailing list