This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Microoptimize regex, take 2


Hi.  This is the same series of speedups that I sent earlier in
March, without the changes for expecting mb_cur_max to be 1.

Note that I did not refer to linguistic minorities, but to
a relative minority of UTF8 regexes that can actually be optimized,
for example regexes with character ranges cannot be reduced to
mb_cur_max to be 1.  Anyway no problem, I take your point and
here is the updated patch you requested.

Paolo

2004-03-24  Paolo Bonzini  <bonzini@gnu.org>

	* posix/regex_internal.c (free_state): Free the
	word_trtable if it is used.
	(re_string_reconstruct, re_string_context_at): Add
	several branch predictions for mb_cur_max == 1,
	case-sensitive matching and no transition table being used.
	* posix/regex_internal.h (re_dfastate_t): Turn the
	word_trtable from a 1-bit flag into a pointer to a
	transition table.
	* posix/regexec.c (acquire_init_state_context): Do not
	always inline.
	* posix/regexec.c (build_trtable): Store the transition
	table into state.  Return a boolean indicating success.
	(transit_state): Check trtable and word_trtable separately,
	instead of looking into word_trtable for information about
	the trtable's contents.  Remove the check for out-of-bounds
	buffers.
	(check_matching): Check here for out-of-bounds buffers.
	(match_ctx_free_subtops): Remove, merge into...
	(match_ctx_clean): ... this function.
	(match_ctx_free): Call match_ctx_clean.
	(re_search_internal): Store into match_kind a set of bits
	indicating which incantation of fastmap scanning must be
	used.  Use a switch statement instead of multiple ifs.
	Exit the final "for (;;)" with goto free_return unless
	the match succeeded, thus simplifying some conditionals.


diff -u save/regex_internal.h ./regex_internal.h
--- save/regex_internal.h	2004-03-10 12:28:08.000000000 +0100
+++ ./regex_internal.h	2004-03-10 12:28:35.000000000 +0100
@@ -478,6 +478,7 @@
   re_node_set nodes;
   re_node_set *entrance_nodes;
   struct re_dfastate_t **trtable;
+  struct re_dfastate_t **word_trtable;
   unsigned int context : 4;
   unsigned int halt : 1;
   /* If this state can accept `multi byte'.
@@ -487,7 +488,6 @@
   /* If this state has backreference node(s).  */
   unsigned int has_backref : 1;
   unsigned int has_constraint : 1;
-  unsigned int word_trtable : 1;
 };
 typedef struct re_dfastate_t re_dfastate_t;
 
diff -u save/regexec.c ./regexec.c
--- save/regexec.c	2004-03-10 12:28:08.000000000 +0100
+++ ./regexec.c	2004-03-10 12:31:41.000000000 +0100
@@ -22,8 +22,6 @@
 				     int n) internal_function;
 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
 static void match_ctx_free (re_match_context_t *cache) internal_function;
-static void match_ctx_free_subtops (re_match_context_t *mctx)
-     internal_function;
 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
 					  int str_idx, int from, int to)
      internal_function;
@@ -57,7 +55,7 @@
 			      int nregs, int regs_allocated) internal_function;
 static inline re_dfastate_t *acquire_init_state_context
      (reg_errcode_t *err, const re_match_context_t *mctx, int idx)
-     __attribute ((always_inline)) internal_function;
+     internal_function;
 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
      internal_function;
 static int check_matching (re_match_context_t *mctx, int fl_longest_match,
@@ -172,8 +170,8 @@
 					 re_node_set *cur_nodes, int cur_str,
 					 int last_str, int subexp_num,
 					 int type) internal_function;
-static re_dfastate_t **build_trtable (re_dfa_t *dfa,
-				      re_dfastate_t *state) internal_function;
+static int build_trtable (re_dfa_t *dfa,
+			  re_dfastate_t *state) internal_function;
 #ifdef RE_ENABLE_I18N
 static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
 				    const re_string_t *input, int idx) internal_function;
@@ -603,15 +601,16 @@
   reg_errcode_t err;
   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
   int left_lim, right_lim, incr;
-  int fl_longest_match, match_first, match_last = -1;
-  int fast_translate, sb;
+  int fl_longest_match, match_first, match_kind, match_last = -1;
+  int fast_translate, sb, ch;
 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
   re_match_context_t mctx = { .dfa = dfa };
 #else
   re_match_context_t mctx;
 #endif
-  char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
-		    && range && !preg->can_be_null) ? preg->fastmap : NULL);
+  char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
+		   && range && !preg->can_be_null) ? preg->fastmap : NULL;
+  unsigned RE_TRANSLATE_TYPE t = (unsigned RE_TRANSLATE_TYPE) preg->translate;
 
 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
   memset (&mctx, '\0', sizeof (re_match_context_t));
@@ -684,88 +683,97 @@
   left_lim = (range < 0) ? start + range : start;
   right_lim = (range < 0) ? start : start + range;
   sb = dfa->mb_cur_max == 1;
-  fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
+  match_kind = 
+    (fastmap ? 8 : 0)
+    | (sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
+    | (range >= 0 ? 2 : 0)
+    | (t != NULL ? 1 : 0);
 
-  for (;;)
+  for (;; match_first += incr)
     {
-      /* At first get the current byte from input string.  */
-      if (fastmap)
-	{
-	  if (BE (fast_translate, 1))
-	    {
-	      unsigned RE_TRANSLATE_TYPE t
-		= (unsigned RE_TRANSLATE_TYPE) preg->translate;
-	      if (BE (range >= 0, 1))
-		{
-		  if (BE (t != NULL, 0))
-		    {
-		      while (BE (match_first < right_lim, 1)
-			     && !fastmap[t[(unsigned char) string[match_first]]])
-			++match_first;
-		    }
-		  else
-		    {
-		      while (BE (match_first < right_lim, 1)
-			     && !fastmap[(unsigned char) string[match_first]])
-			++match_first;
-		    }
-		  if (BE (match_first == right_lim, 0))
-		    {
-		      int ch = match_first >= length
-			       ? 0 : (unsigned char) string[match_first];
-		      if (!fastmap[t ? t[ch] : ch])
-			break;
-		    }
-		}
-	      else
-		{
-		  while (match_first >= left_lim)
-		    {
-		      int ch = match_first >= length
-			       ? 0 : (unsigned char) string[match_first];
-		      if (fastmap[t ? t[ch] : ch])
-			break;
-		      --match_first;
-		    }
-		  if (match_first < left_lim)
-		    break;
-		}
+      err = REG_NOMATCH;
+      if (match_first < left_lim || right_lim < match_first)
+	goto free_return;
+
+      /* Advance as rapidly as possible through the string, until we
+	 find a plausible place to start matching.  This may be done
+	 with varying efficiency, so there are various possibilities:
+	 only the most common of them are specialized, in order to
+	 save on code size.  We use a switch statement for speed.  */
+      switch (match_kind)
+	{
+	case 0: case 1: case 2: case 3:
+	case 4: case 5: case 6: case 7:
+	  /* No fastmap.  */
+	  break;
+
+	case 15:
+	  /* Fastmap with single-byte translation, match forward.  */
+	  while (BE (match_first < right_lim, 1)
+		 && !fastmap[t[(unsigned char) string[match_first]]])
+	    ++match_first;
+	  goto forward_match_found_start_or_reached_end;
+
+	case 14:
+	  /* Fastmap without translation, match forward.  */
+	  while (BE (match_first < right_lim, 1)
+		 && !fastmap[(unsigned char) string[match_first]])
+	    ++match_first;
+
+	forward_match_found_start_or_reached_end:
+	  if (BE (match_first == right_lim, 0))
+	    {
+	      ch = match_first >= length
+		       ? 0 : (unsigned char) string[match_first];
+	      if (!fastmap[t ? t[ch] : ch])
+		goto free_return;
 	    }
-	  else
-	    {
-	      int ch;
+	  break;
 
-	      do
+	case 12:
+	case 13:
+	  /* Fastmap without multi-byte translation, match backwards.  */
+	  while (match_first >= left_lim)
+	    {
+	      ch = match_first >= length
+		       ? 0 : (unsigned char) string[match_first];
+	      if (fastmap[t ? t[ch] : ch])
+		break;
+	      --match_first;
+	    }
+	  if (match_first < left_lim)
+	    goto free_return;
+	  break;
+	  
+	default:
+	  /* In this case, we can't determine easily the current byte,
+	     since it might be a component byte of a multibyte
+	     character.  Then we use the constructed buffer instead.  */
+	  do
+	    {
+	      /* If MATCH_FIRST is out of the valid range, reconstruct the
+		 buffers.  */
+	      if (mctx.input.raw_mbs_idx + mctx.input.valid_raw_len <= match_first
+		  || match_first < mctx.input.raw_mbs_idx)
 		{
-		  /* In this case, we can't determine easily the current byte,
-		     since it might be a component byte of a multibyte
-		     character.  Then we use the constructed buffer
-		     instead.  */
-		  /* If MATCH_FIRST is out of the valid range, reconstruct the
-		     buffers.  */
-		  if (mctx.input.raw_mbs_idx + mctx.input.valid_raw_len
-		      <= match_first
-		      || match_first < mctx.input.raw_mbs_idx)
-		    {
-		      err = re_string_reconstruct (&mctx.input, match_first,
-						   eflags);
-		      if (BE (err != REG_NOERROR, 0))
-			goto free_return;
-		    }
-		  /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
-		     Note that MATCH_FIRST must not be smaller than 0.  */
-		  ch = ((match_first >= length) ? 0
-		       : re_string_byte_at (&mctx.input,
-					    match_first
-					    - mctx.input.raw_mbs_idx));
-		  if (fastmap[ch])
-		    break;
-		  match_first += incr;
+		  err = re_string_reconstruct (&mctx.input, match_first,
+					       eflags);
+		  if (BE (err != REG_NOERROR, 0))
+		    goto free_return;
 		}
-	      while (match_first >= left_lim && match_first <= right_lim);
-	      if (! fastmap[ch])
+	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
+		 Note that MATCH_FIRST must not be smaller than 0.  */
+	      ch = ((match_first >= length) ? 0
+		    : re_string_byte_at (&mctx.input,
+					 match_first - mctx.input.raw_mbs_idx));
+	      if (fastmap[ch])
 		break;
+	      match_first += incr;
 	    }
+	  while (match_first >= left_lim && match_first <= right_lim);
+	  if (!fastmap[ch])
+	    goto free_return;
+	  break;
 	}
 
       /* Reconstruct the buffers so that the matcher can assume that
@@ -773,57 +781,60 @@
       err = re_string_reconstruct (&mctx.input, match_first, eflags);
       if (BE (err != REG_NOERROR, 0))
 	goto free_return;
+
 #ifdef RE_ENABLE_I18N
-     /* Eliminate it when it is a component of a multibyte character
-	 and isn't the head of a multibyte character.  */
-      if (sb || re_string_first_byte (&mctx.input, 0))
+     /* Don't consider this char as a possible match start if it part,
+	yet isn't the head, of a multibyte character.  */
+      if (!sb && !re_string_first_byte (&mctx.input, 0))
+	continue;
 #endif
+
+      /* It seems to be appropriate one, then use the matcher.  */
+      /* We assume that the matching starts from 0.  */
+      mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
+      match_last = check_matching (&mctx, fl_longest_match,
+				   range >= 0 ? &match_first : NULL);
+      if (match_last != -1)
 	{
-	  /* It seems to be appropriate one, then use the matcher.  */
-	  /* We assume that the matching starts from 0.  */
-	  mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
-	  match_last = check_matching (&mctx, fl_longest_match,
-				       range >= 0 ? &match_first : NULL);
-	  if (match_last != -1)
+	  if (BE (match_last == -2, 0))
 	    {
-	      if (BE (match_last == -2, 0))
+	      err = REG_ESPACE;
+	      goto free_return;
+	    }
+	  else
+	    {
+	      mctx.match_last = match_last;
+	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
 		{
-		  err = REG_ESPACE;
-		  goto free_return;
+		  re_dfastate_t *pstate = mctx.state_log[match_last];
+		  mctx.last_node = check_halt_state_context (&mctx, pstate,
+							     match_last);
 		}
-	      else
+	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
+		  || dfa->nbackref)
 		{
-		  mctx.match_last = match_last;
-		  if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
-		    {
-		      re_dfastate_t *pstate = mctx.state_log[match_last];
-		      mctx.last_node = check_halt_state_context (&mctx, pstate,
-								 match_last);
-		    }
-		  if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
-		      || dfa->nbackref)
-		    {
-		      err = prune_impossible_nodes (&mctx);
-		      if (err == REG_NOERROR)
-			break;
-		      if (BE (err != REG_NOMATCH, 0))
-			goto free_return;
-		      match_last = -1;
-		    }
-		  else
-		    break; /* We found a match.  */
+		  err = prune_impossible_nodes (&mctx);
+		  if (err == REG_NOERROR)
+		    break;
+		  if (BE (err != REG_NOMATCH, 0))
+		    goto free_return;
+		  match_last = -1;
 		}
+	      else
+		break; /* We found a match.  */
 	    }
-	  match_ctx_clean (&mctx);
 	}
-      /* Update counter.  */
-      match_first += incr;
-      if (match_first < left_lim || right_lim < match_first)
-	break;
+
+      match_ctx_clean (&mctx);
     }
 
+#ifdef DEBUG
+  assert (match_last != -1);
+  assert (err == REG_NOERROR);
+#endif
+
   /* Set pmatch[] if we need.  */
-  if (match_last != -1 && nmatch > 0)
+  if (nmatch > 0)
     {
       int reg_idx;
 
@@ -868,7 +879,7 @@
 	    pmatch[reg_idx].rm_eo += match_first;
 	  }
     }
-  err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
+
  free_return:
   re_free (mctx.state_log);
   if (dfa->nbackref)
@@ -1073,6 +1084,20 @@
   while (!re_string_eoi (&mctx->input))
     {
       re_dfastate_t *old_state = cur_state;
+      int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
+
+      if (BE (next_char_idx >= mctx->input.bufs_len, 0)
+          || (BE (next_char_idx >= mctx->input.valid_len, 0)
+              && mctx->input.valid_len < mctx->input.len))
+        {
+          err = extend_buffers (mctx);
+          if (BE (err != REG_NOERROR, 0))
+	    {
+	      assert (err == REG_ESPACE);
+	      return -2;
+	    }
+        }
+
       cur_state = transit_state (&err, mctx, cur_state);
       if (mctx->state_log != NULL)
 	cur_state = merge_state_with_log (&err, mctx, cur_state);
@@ -1091,10 +1116,10 @@
 	    break;
 	}
 
-      if (at_init_state)
+      if (BE (at_init_state, 0))
 	{
 	  if (old_state == cur_state)
-	    next_start_idx = re_string_cur_idx (&mctx->input);
+	    next_start_idx = next_char_idx;
 	  else
 	    at_init_state = 0;
 	}
@@ -1110,13 +1135,16 @@
 	      /* We found an appropriate halt state.  */
 	      match_last = re_string_cur_idx (&mctx->input);
 	      match = 1;
+
+	      /* We found a match, do not modify match_first below.  */
+	      p_match_first = NULL;
 	      if (!fl_longest_match)
 		break;
 	    }
 	}
-   }
+    }
 
-  if (match_last == -1 && p_match_first)
+  if (p_match_first)
     *p_match_first += next_start_idx;
 
   return match_last;
@@ -2168,22 +2196,12 @@
      re_match_context_t *mctx;
      re_dfastate_t *state;
 {
-  re_dfa_t *const dfa = mctx->dfa;
   re_dfastate_t **trtable;
   unsigned char ch;
 
-  if (re_string_cur_idx (&mctx->input) + 1 >= mctx->input.bufs_len
-      || (re_string_cur_idx (&mctx->input) + 1 >= mctx->input.valid_len
-	  && mctx->input.valid_len < mctx->input.len))
-    {
-      *err = extend_buffers (mctx);
-      if (BE (*err != REG_NOERROR, 0))
-	return NULL;
-    }
-
 #ifdef RE_ENABLE_I18N
       /* If the current state can accept multibyte.  */
-      if (state->accept_mb)
+      if (BE (state->accept_mb, 0))
 	{
 	  *err = transit_state_mb (mctx, state);
 	  if (BE (*err != REG_NOERROR, 0))
@@ -2194,32 +2212,34 @@
   /* Then decide the next state with the single byte.  */
   if (1)
     {
-      /* Use transition table  */
+      /* Use transition table.  Sorry for the goto, but we really need
+         to squeeze every single instruction here.  */
       ch = re_string_fetch_byte (&mctx->input);
+ 
+    retry:
       trtable = state->trtable;
-      if (trtable == NULL)
+      if (BE (trtable != NULL, 1))
+        return trtable[ch];
+
+      trtable = state->word_trtable;
+      if (BE (trtable != NULL, 1))
         {
-          trtable = build_trtable (dfa, state);
-          if (trtable == NULL)
-	    {
-	      *err = REG_ESPACE;
-	      return NULL;
-	    }
-	}
-      if (BE (state->word_trtable, 0))
+          unsigned int context;
+          context = re_string_context_at (&mctx->input,
+                                          re_string_cur_idx (&mctx->input) - 1,
+                                          mctx->eflags);
+          if (IS_WORD_CONTEXT (context))
+            return trtable[ch + SBC_MAX];
+          else
+            return trtable[ch];
+        }
+
+      if (!build_trtable (mctx->dfa, state))
         {
-	  unsigned int context;
-	  context
-	    = re_string_context_at (&mctx->input,
-				    re_string_cur_idx (&mctx->input) - 1,
-				    mctx->eflags);
-	  if (IS_WORD_CONTEXT (context))
-	    return trtable[ch + SBC_MAX];
-	  else
-	    return trtable[ch];
-	}
-      else
-	return trtable[ch];
+          *err = REG_ESPACE;
+          return NULL;
+        }
+      goto retry;
     }
 #if 0
   else
@@ -3228,7 +3248,7 @@
 /* Build transition table for the state.
    Return the new table if succeeded, otherwise return NULL.  */
 
-static re_dfastate_t **
+static int
 build_trtable (dfa, state)
     re_dfa_t *dfa;
     re_dfastate_t *state;
@@ -3238,6 +3258,7 @@
   unsigned int elem, mask;
   int dests_node_malloced = 0, dest_states_malloced = 0;
   int ndests; /* Number of the destination states from `state'.  */
+  int need_word_trtable = 0;
   re_dfastate_t **trtable;
   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
   re_node_set follows, *dests_node;
@@ -3258,14 +3279,11 @@
       dests_node = (re_node_set *)
 		   malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
       if (BE (dests_node == NULL, 0))
-	return NULL;
+	return 0;
       dests_node_malloced = 1;
     }
   dests_ch = (bitset *) (dests_node + SBC_MAX);
 
-  /* Initialize transiton table.  */
-  state->word_trtable = 0;
-
   /* At first, group all nodes belonging to `state' into several
      destinations.  */
   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
@@ -3273,14 +3291,12 @@
     {
       if (dests_node_malloced)
 	free (dests_node);
-      /* Return NULL in case of an error, trtable otherwise.  */
       if (ndests == 0)
-	{
-	  state->trtable = (re_dfastate_t **)
-	    calloc (sizeof (re_dfastate_t *), SBC_MAX);;
-	  return state->trtable;
-	}
-      return NULL;
+	state->trtable = (re_dfastate_t **)
+	  calloc (sizeof (re_dfastate_t *), SBC_MAX);;
+
+      /* Return 0 in case of an error, 1 otherwise.  */
+      return state->trtable != NULL;
     }
 
   err = re_node_set_alloc (&follows, ndests + 1);
@@ -3307,7 +3323,7 @@
 	    re_node_set_free (dests_node + i);
 	  if (dests_node_malloced)
 	    free (dests_node);
-	  return NULL;
+	  return 0;
 	}
       dest_states_malloced = 1;
     }
@@ -3343,9 +3359,11 @@
 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
 	    goto out_free;
 
+#ifdef RE_ENABLE_I18N
 	  if (dest_states[i] != dest_states_word[i]
 	      && dfa->mb_cur_max > 1)
-	    state->word_trtable = 1;
+	    need_word_trtable = 1;
+#endif
 
 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
 							CONTEXT_NEWLINE);
@@ -3360,7 +3378,7 @@
       bitset_merge (acceptable, dests_ch[i]);
     }
 
-  if (!BE (state->word_trtable, 0))
+  if (!BE (need_word_trtable, 0))
     {
       /* We don't care about whether the following character is a word
 	 character, or we are in a single-byte character set so we can
@@ -3389,6 +3407,7 @@
 		trtable[ch] = dest_states[j];
 	    }
     }
+#ifdef RE_ENABLE_I18N
   else
     {
       /* We care about whether the following character is a word
@@ -3418,6 +3437,7 @@
 	      trtable[ch + SBC_MAX] = dest_states_word[j];
 	    }
     }
+#endif
 
   /* new line */
   if (bitset_contain (acceptable, NEWLINE_CHAR))
@@ -3428,7 +3448,7 @@
 	  {
 	    /* k-th destination accepts newline character.  */
 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
-	    if (state->word_trtable)
+	    if (need_word_trtable)
 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
 	    /* There must be only one destination which accepts
 	       newline.  See group_nodes_into_DFAstates.  */
@@ -3446,8 +3466,12 @@
   if (dests_node_malloced)
     free (dests_node);
 
-  state->trtable = trtable;
-  return trtable;
+  if (need_word_trtable)
+    state->word_trtable = trtable;
+  else
+    state->trtable = trtable;
+
+  return 1;
 }
 
 /* Group all nodes belonging to STATE into several destinations.
@@ -4079,28 +4103,6 @@
 match_ctx_clean (mctx)
     re_match_context_t *mctx;
 {
-  match_ctx_free_subtops (mctx);
-  mctx->nsub_tops = 0;
-  mctx->nbkref_ents = 0;
-}
-
-/* Free all the memory associated with MCTX.  */
-
-static void
-match_ctx_free (mctx)
-    re_match_context_t *mctx;
-{
-  match_ctx_free_subtops (mctx);
-  re_free (mctx->sub_tops);
-  re_free (mctx->bkref_ents);
-}
-
-/* Free all the memory associated with MCTX->SUB_TOPS.  */
-
-static void
-match_ctx_free_subtops (mctx)
-     re_match_context_t *mctx;
-{
   int st_idx;
   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
     {
@@ -4120,8 +4122,24 @@
 	}
       free (top);
     }
+
+  mctx->nsub_tops = 0;
+  mctx->nbkref_ents = 0;
 }
 
+/* Free all the memory associated with MCTX.  */
+
+static void
+match_ctx_free (mctx)
+    re_match_context_t *mctx;
+{
+  /* First, free all the memory associated with MCTX->SUB_TOPS.  */
+  match_ctx_clean (mctx);
+  re_free (mctx->sub_tops);
+  re_free (mctx->bkref_ents);
+}
+
+
 /* Add a new backreference entry to MCTX.
    Note that we assume that caller never call this function with duplicate
    entry, and call with STR_IDX which isn't smaller than any existing entry.
diff -u save/regex_internal.c ./regex_internal.c
--- save/regex_internal.c	2004-03-24 12:04:14.000000000 +0100
+++ ./regex_internal.c	2004-03-24 12:05:05.000000000 +0100
@@ -581,7 +581,7 @@
      int idx, eflags;
 {
   int offset = idx - pstr->raw_mbs_idx;
-  if (offset < 0)
+  if (BE (offset < 0, 0))
     {
       /* Reset buffer.  */
 #ifdef RE_ENABLE_I18N
@@ -601,10 +601,10 @@
       offset = idx;
     }
 
-  if (offset != 0)
+  if (BE (offset != 0, 1))
     {
       /* Are the characters which are already checked remain?  */
-      if (offset < pstr->valid_raw_len
+      if (BE (offset < pstr->valid_raw_len, 1)
 #ifdef RE_ENABLE_I18N
 	  /* Handling this would enlarge the code too much.
 	     Accept a slowdown in that case.  */
@@ -619,7 +619,7 @@
 	    memmove (pstr->wcs, pstr->wcs + offset,
 		     (pstr->valid_len - offset) * sizeof (wint_t));
 #endif /* RE_ENABLE_I18N */
-	  if (pstr->mbs_allocated)
+	  if (BE (pstr->mbs_allocated, 0))
 	    memmove (pstr->mbs, pstr->mbs + offset,
 		     pstr->valid_len - offset);
 	  pstr->valid_len -= offset;
@@ -717,7 +717,7 @@
 				      ? CONTEXT_NEWLINE : 0));
 	    }
 	}
-      if (!pstr->mbs_allocated)
+      if (!BE (pstr->mbs_allocated, 0))
 	pstr->mbs += offset;
     }
   pstr->raw_mbs_idx = idx;
@@ -739,16 +739,17 @@
     }
   else
 #endif /* RE_ENABLE_I18N */
+  if (BE (pstr->mbs_allocated, 0))
     {
       if (pstr->icase)
 	build_upper_buffer (pstr);
       else if (pstr->trans != NULL)
 	re_string_translate_buffer (pstr);
-      else
-	pstr->valid_len = pstr->len;
     }
-  pstr->cur_idx = 0;
+  else
+    pstr->valid_len = pstr->len;
 
+  pstr->cur_idx = 0;
   return REG_NOERROR;
 }
 
@@ -846,16 +847,13 @@
      int idx, eflags;
 {
   int c;
-  if (idx < 0 || idx == input->len)
-    {
-      if (idx < 0)
-	/* In this case, we use the value stored in input->tip_context,
-	   since we can't know the character in input->mbs[-1] here.  */
-	return input->tip_context;
-      else /* (idx == input->len) */
-	return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
-		: CONTEXT_NEWLINE | CONTEXT_ENDBUF);
-    }
+  if (BE (idx < 0, 0))
+    /* In this case, we use the value stored in input->tip_context,
+       since we can't know the character in input->mbs[-1] here.  */
+    return input->tip_context;
+  if (BE (idx == input->len, 0))
+    return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
+	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
 #ifdef RE_ENABLE_I18N
   if (input->mb_cur_max > 1)
     {
@@ -1650,5 +1648,6 @@
     }
   re_node_set_free (&state->nodes);
   re_free (state->trtable);
+  re_free (state->word_trtable);
   re_free (state);
 }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]