[PATCH] Fix ^[^#]*\bflistdel regex compilation

Jakub Jelinek jakub@redhat.com
Sun Dec 21 21:23:00 GMT 2003


Hi!

Seems too many places in regexec.c and regcomp.c rely on edests to have at
least one element for OP_BACK_REF and epsilon nodes.
Following patch should have no runtime impact on not \b\B\<\> using regexps
and even for those which use them the impact should be very low.
Adding dfa->edests[X].nelems > 0 checks all around would probably be more
expensive.

2003-12-21  Jakub Jelinek  <jakub@redhat.com>

	* posix/regcomp.c (duplicate_node, duplicate_node_closure): Revert
	2003-11-24 changes.
	* posix/regexec.c (group_nodes_into_DFAstates): For CHARACTER with
	NEXT_{,NOT}WORD_CONSTRAINT check word_char bit.
	* posix/bug-regex19.c (tests): Add new tests.

--- libc/posix/regcomp.c.jj	2003-12-21 13:15:16.000000000 +0100
+++ libc/posix/regcomp.c	2003-12-21 22:11:17.000000000 +0100
@@ -1336,8 +1336,6 @@ duplicate_node_closure (dfa, top_org_nod
 	  if (BE (err != REG_NOERROR, 0))
 	    return err;
 	  dfa->nexts[clone_node] = dfa->nexts[org_node];
-	  if (clone_dest == -1)
-	    break;
 	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
 	  if (BE (ret < 0, 0))
 	    return REG_ESPACE;
@@ -1375,8 +1373,6 @@ duplicate_node_closure (dfa, top_org_nod
 	  err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
 	  if (BE (err != REG_NOERROR, 0))
 	    return err;
-	  if (clone_dest == -1)
-	    break;
 	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
 	  if (BE (ret < 0, 0))
 	    return REG_ESPACE;
@@ -1395,16 +1391,13 @@ duplicate_node_closure (dfa, top_org_nod
 	      err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
 	      if (BE (err != REG_NOERROR, 0))
 		return err;
-	      if (clone_dest != -1)
-		{
-		  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
-		  if (BE (ret < 0, 0))
-		    return REG_ESPACE;
-		  err = duplicate_node_closure (dfa, org_dest, clone_dest,
-						root_node, constraint);
-		  if (BE (err != REG_NOERROR, 0))
-		    return err;
-		}
+	      ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	      if (BE (ret < 0, 0))
+		return REG_ESPACE;
+	      err = duplicate_node_closure (dfa, org_dest, clone_dest,
+					    root_node, constraint);
+	      if (BE (err != REG_NOERROR, 0))
+		return err;
 	    }
 	  else
 	    {
@@ -1419,8 +1412,6 @@ duplicate_node_closure (dfa, top_org_nod
 	  err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
 	  if (BE (err != REG_NOERROR, 0))
 	    return err;
-	  if (clone_dest == -1)
-	    break;
 	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
 	  if (BE (ret < 0, 0))
 	    return REG_ESPACE;
@@ -1460,21 +1451,7 @@ duplicate_node (new_idx, dfa, org_idx, c
      int *new_idx, org_idx;
      unsigned int constraint;
 {
-  int dup_idx;
-
-  if (dfa->nodes[org_idx].type == CHARACTER
-      && (((constraint & NEXT_WORD_CONSTRAINT)
-	   && !dfa->nodes[org_idx].word_char)
-	  || ((constraint & NEXT_NOTWORD_CONSTRAINT)
-	      && dfa->nodes[org_idx].word_char)))
-    {
-      /* \<!, \>W etc. can never match.  Don't duplicate them, instead
-	 tell the caller they shouldn't be added to edests.  */
-      *new_idx = -1;
-      return REG_NOERROR;
-    }
-
-  dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx], 1);
+  int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx], 1);
   if (BE (dup_idx == -1, 0))
     return REG_ESPACE;
   dfa->nodes[dup_idx].constraint = constraint;
--- libc/posix/bug-regex19.c.jj	2003-11-24 23:49:52.000000000 +0100
+++ libc/posix/bug-regex19.c	2003-12-21 22:10:56.000000000 +0100
@@ -246,6 +246,9 @@ static struct test_s
   {ERE, "(\\<|[A].)[A~C]", "DACC", 0, 1},
   {ERE, "(\\<|[A].)[A~C]", "B!A=", 0, 2},
   {ERE, "(\\<|[A].)[A~C]", "B~C", 0, 2},
+  {ERE, "^[^A]*\\bB", "==B", 0, 0},
+  {ERE, "^[^A]*\\bB", "CBD!=B", 0, 0},
+  {ERE, "[^A]*\\bB", "==B", 2, 2}
 };
 
 int
--- libc/posix/regexec.c.jj	2003-12-21 20:59:10.000000000 +0100
+++ libc/posix/regexec.c	2003-12-21 21:46:47.000000000 +0100
@@ -3416,6 +3416,11 @@ group_nodes_into_DFAstates (preg, state,
 	  if (constraint & NEXT_WORD_CONSTRAINT)
 	    {
 	      unsigned int any_set = 0;
+	      if (type == CHARACTER && !node->word_char)
+		{
+		  bitset_empty (accepts);
+		  continue;
+		}
 #ifdef RE_ENABLE_I18N
 	      if (dfa->mb_cur_max > 1)
 		for (j = 0; j < BITSET_UINTS; ++j)
@@ -3430,6 +3435,11 @@ group_nodes_into_DFAstates (preg, state,
 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
 	    {
 	      unsigned int any_set = 0;
+	      if (type == CHARACTER && node->word_char)
+		{
+		  bitset_empty (accepts);
+		  continue;
+		}
 #ifdef RE_ENABLE_I18N
 	      if (dfa->mb_cur_max > 1)
 		for (j = 0; j < BITSET_UINTS; ++j)

	Jakub



More information about the Libc-hacker mailing list