This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
[PATCH] Microoptimize regex, take 2
- From: Bonzini <bonzini at gnu dot org>
- To: libc-alpha at sources dot redhat dot com
- Date: Wed, 24 Mar 2004 06:05:59 -0500
- Subject: [PATCH] Microoptimize regex, take 2
Hi. This is the same series of speedups that I sent earlier in
March, without the changes for expecting mb_cur_max to be 1.
Note that I did not refer to linguistic minorities, but to
a relative minority of UTF8 regexes that can actually be optimized,
for example regexes with character ranges cannot be reduced to
mb_cur_max to be 1. Anyway no problem, I take your point and
here is the updated patch you requested.
Paolo
2004-03-24 Paolo Bonzini <bonzini@gnu.org>
* posix/regex_internal.c (free_state): Free the
word_trtable if it is used.
(re_string_reconstruct, re_string_context_at): Add
several branch predictions for mb_cur_max == 1,
case-sensitive matching and no transition table being used.
* posix/regex_internal.h (re_dfastate_t): Turn the
word_trtable from a 1-bit flag into a pointer to a
transition table.
* posix/regexec.c (acquire_init_state_context): Do not
always inline.
* posix/regexec.c (build_trtable): Store the transition
table into state. Return a boolean indicating success.
(transit_state): Check trtable and word_trtable separately,
instead of looking into word_trtable for information about
the trtable's contents. Remove the check for out-of-bounds
buffers.
(check_matching): Check here for out-of-bounds buffers.
(match_ctx_free_subtops): Remove, merge into...
(match_ctx_clean): ... this function.
(match_ctx_free): Call match_ctx_clean.
(re_search_internal): Store into match_kind a set of bits
indicating which incantation of fastmap scanning must be
used. Use a switch statement instead of multiple ifs.
Exit the final "for (;;)" with goto free_return unless
the match succeeded, thus simplifying some conditionals.
diff -u save/regex_internal.h ./regex_internal.h
--- save/regex_internal.h 2004-03-10 12:28:08.000000000 +0100
+++ ./regex_internal.h 2004-03-10 12:28:35.000000000 +0100
@@ -478,6 +478,7 @@
re_node_set nodes;
re_node_set *entrance_nodes;
struct re_dfastate_t **trtable;
+ struct re_dfastate_t **word_trtable;
unsigned int context : 4;
unsigned int halt : 1;
/* If this state can accept `multi byte'.
@@ -487,7 +488,6 @@
/* If this state has backreference node(s). */
unsigned int has_backref : 1;
unsigned int has_constraint : 1;
- unsigned int word_trtable : 1;
};
typedef struct re_dfastate_t re_dfastate_t;
diff -u save/regexec.c ./regexec.c
--- save/regexec.c 2004-03-10 12:28:08.000000000 +0100
+++ ./regexec.c 2004-03-10 12:31:41.000000000 +0100
@@ -22,8 +22,6 @@
int n) internal_function;
static void match_ctx_clean (re_match_context_t *mctx) internal_function;
static void match_ctx_free (re_match_context_t *cache) internal_function;
-static void match_ctx_free_subtops (re_match_context_t *mctx)
- internal_function;
static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
int str_idx, int from, int to)
internal_function;
@@ -57,7 +55,7 @@
int nregs, int regs_allocated) internal_function;
static inline re_dfastate_t *acquire_init_state_context
(reg_errcode_t *err, const re_match_context_t *mctx, int idx)
- __attribute ((always_inline)) internal_function;
+ internal_function;
static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
internal_function;
static int check_matching (re_match_context_t *mctx, int fl_longest_match,
@@ -172,8 +170,8 @@
re_node_set *cur_nodes, int cur_str,
int last_str, int subexp_num,
int type) internal_function;
-static re_dfastate_t **build_trtable (re_dfa_t *dfa,
- re_dfastate_t *state) internal_function;
+static int build_trtable (re_dfa_t *dfa,
+ re_dfastate_t *state) internal_function;
#ifdef RE_ENABLE_I18N
static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
const re_string_t *input, int idx) internal_function;
@@ -603,15 +601,16 @@
reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
int left_lim, right_lim, incr;
- int fl_longest_match, match_first, match_last = -1;
- int fast_translate, sb;
+ int fl_longest_match, match_first, match_kind, match_last = -1;
+ int fast_translate, sb, ch;
#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
re_match_context_t mctx = { .dfa = dfa };
#else
re_match_context_t mctx;
#endif
- char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
- && range && !preg->can_be_null) ? preg->fastmap : NULL);
+ char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
+ && range && !preg->can_be_null) ? preg->fastmap : NULL;
+ unsigned RE_TRANSLATE_TYPE t = (unsigned RE_TRANSLATE_TYPE) preg->translate;
#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
memset (&mctx, '\0', sizeof (re_match_context_t));
@@ -684,88 +683,97 @@
left_lim = (range < 0) ? start + range : start;
right_lim = (range < 0) ? start : start + range;
sb = dfa->mb_cur_max == 1;
- fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
+ match_kind =
+ (fastmap ? 8 : 0)
+ | (sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
+ | (range >= 0 ? 2 : 0)
+ | (t != NULL ? 1 : 0);
- for (;;)
+ for (;; match_first += incr)
{
- /* At first get the current byte from input string. */
- if (fastmap)
- {
- if (BE (fast_translate, 1))
- {
- unsigned RE_TRANSLATE_TYPE t
- = (unsigned RE_TRANSLATE_TYPE) preg->translate;
- if (BE (range >= 0, 1))
- {
- if (BE (t != NULL, 0))
- {
- while (BE (match_first < right_lim, 1)
- && !fastmap[t[(unsigned char) string[match_first]]])
- ++match_first;
- }
- else
- {
- while (BE (match_first < right_lim, 1)
- && !fastmap[(unsigned char) string[match_first]])
- ++match_first;
- }
- if (BE (match_first == right_lim, 0))
- {
- int ch = match_first >= length
- ? 0 : (unsigned char) string[match_first];
- if (!fastmap[t ? t[ch] : ch])
- break;
- }
- }
- else
- {
- while (match_first >= left_lim)
- {
- int ch = match_first >= length
- ? 0 : (unsigned char) string[match_first];
- if (fastmap[t ? t[ch] : ch])
- break;
- --match_first;
- }
- if (match_first < left_lim)
- break;
- }
+ err = REG_NOMATCH;
+ if (match_first < left_lim || right_lim < match_first)
+ goto free_return;
+
+ /* Advance as rapidly as possible through the string, until we
+ find a plausible place to start matching. This may be done
+ with varying efficiency, so there are various possibilities:
+ only the most common of them are specialized, in order to
+ save on code size. We use a switch statement for speed. */
+ switch (match_kind)
+ {
+ case 0: case 1: case 2: case 3:
+ case 4: case 5: case 6: case 7:
+ /* No fastmap. */
+ break;
+
+ case 15:
+ /* Fastmap with single-byte translation, match forward. */
+ while (BE (match_first < right_lim, 1)
+ && !fastmap[t[(unsigned char) string[match_first]]])
+ ++match_first;
+ goto forward_match_found_start_or_reached_end;
+
+ case 14:
+ /* Fastmap without translation, match forward. */
+ while (BE (match_first < right_lim, 1)
+ && !fastmap[(unsigned char) string[match_first]])
+ ++match_first;
+
+ forward_match_found_start_or_reached_end:
+ if (BE (match_first == right_lim, 0))
+ {
+ ch = match_first >= length
+ ? 0 : (unsigned char) string[match_first];
+ if (!fastmap[t ? t[ch] : ch])
+ goto free_return;
}
- else
- {
- int ch;
+ break;
- do
+ case 12:
+ case 13:
+ /* Fastmap without multi-byte translation, match backwards. */
+ while (match_first >= left_lim)
+ {
+ ch = match_first >= length
+ ? 0 : (unsigned char) string[match_first];
+ if (fastmap[t ? t[ch] : ch])
+ break;
+ --match_first;
+ }
+ if (match_first < left_lim)
+ goto free_return;
+ break;
+
+ default:
+ /* In this case, we can't determine easily the current byte,
+ since it might be a component byte of a multibyte
+ character. Then we use the constructed buffer instead. */
+ do
+ {
+ /* If MATCH_FIRST is out of the valid range, reconstruct the
+ buffers. */
+ if (mctx.input.raw_mbs_idx + mctx.input.valid_raw_len <= match_first
+ || match_first < mctx.input.raw_mbs_idx)
{
- /* In this case, we can't determine easily the current byte,
- since it might be a component byte of a multibyte
- character. Then we use the constructed buffer
- instead. */
- /* If MATCH_FIRST is out of the valid range, reconstruct the
- buffers. */
- if (mctx.input.raw_mbs_idx + mctx.input.valid_raw_len
- <= match_first
- || match_first < mctx.input.raw_mbs_idx)
- {
- err = re_string_reconstruct (&mctx.input, match_first,
- eflags);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
- }
- /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
- Note that MATCH_FIRST must not be smaller than 0. */
- ch = ((match_first >= length) ? 0
- : re_string_byte_at (&mctx.input,
- match_first
- - mctx.input.raw_mbs_idx));
- if (fastmap[ch])
- break;
- match_first += incr;
+ err = re_string_reconstruct (&mctx.input, match_first,
+ eflags);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
}
- while (match_first >= left_lim && match_first <= right_lim);
- if (! fastmap[ch])
+ /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
+ Note that MATCH_FIRST must not be smaller than 0. */
+ ch = ((match_first >= length) ? 0
+ : re_string_byte_at (&mctx.input,
+ match_first - mctx.input.raw_mbs_idx));
+ if (fastmap[ch])
break;
+ match_first += incr;
}
+ while (match_first >= left_lim && match_first <= right_lim);
+ if (!fastmap[ch])
+ goto free_return;
+ break;
}
/* Reconstruct the buffers so that the matcher can assume that
@@ -773,57 +781,60 @@
err = re_string_reconstruct (&mctx.input, match_first, eflags);
if (BE (err != REG_NOERROR, 0))
goto free_return;
+
#ifdef RE_ENABLE_I18N
- /* Eliminate it when it is a component of a multibyte character
- and isn't the head of a multibyte character. */
- if (sb || re_string_first_byte (&mctx.input, 0))
+ /* Don't consider this char as a possible match start if it part,
+ yet isn't the head, of a multibyte character. */
+ if (!sb && !re_string_first_byte (&mctx.input, 0))
+ continue;
#endif
+
+ /* It seems to be appropriate one, then use the matcher. */
+ /* We assume that the matching starts from 0. */
+ mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
+ match_last = check_matching (&mctx, fl_longest_match,
+ range >= 0 ? &match_first : NULL);
+ if (match_last != -1)
{
- /* It seems to be appropriate one, then use the matcher. */
- /* We assume that the matching starts from 0. */
- mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
- match_last = check_matching (&mctx, fl_longest_match,
- range >= 0 ? &match_first : NULL);
- if (match_last != -1)
+ if (BE (match_last == -2, 0))
{
- if (BE (match_last == -2, 0))
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ else
+ {
+ mctx.match_last = match_last;
+ if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
{
- err = REG_ESPACE;
- goto free_return;
+ re_dfastate_t *pstate = mctx.state_log[match_last];
+ mctx.last_node = check_halt_state_context (&mctx, pstate,
+ match_last);
}
- else
+ if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
+ || dfa->nbackref)
{
- mctx.match_last = match_last;
- if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
- {
- re_dfastate_t *pstate = mctx.state_log[match_last];
- mctx.last_node = check_halt_state_context (&mctx, pstate,
- match_last);
- }
- if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
- || dfa->nbackref)
- {
- err = prune_impossible_nodes (&mctx);
- if (err == REG_NOERROR)
- break;
- if (BE (err != REG_NOMATCH, 0))
- goto free_return;
- match_last = -1;
- }
- else
- break; /* We found a match. */
+ err = prune_impossible_nodes (&mctx);
+ if (err == REG_NOERROR)
+ break;
+ if (BE (err != REG_NOMATCH, 0))
+ goto free_return;
+ match_last = -1;
}
+ else
+ break; /* We found a match. */
}
- match_ctx_clean (&mctx);
}
- /* Update counter. */
- match_first += incr;
- if (match_first < left_lim || right_lim < match_first)
- break;
+
+ match_ctx_clean (&mctx);
}
+#ifdef DEBUG
+ assert (match_last != -1);
+ assert (err == REG_NOERROR);
+#endif
+
/* Set pmatch[] if we need. */
- if (match_last != -1 && nmatch > 0)
+ if (nmatch > 0)
{
int reg_idx;
@@ -868,7 +879,7 @@
pmatch[reg_idx].rm_eo += match_first;
}
}
- err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
+
free_return:
re_free (mctx.state_log);
if (dfa->nbackref)
@@ -1073,6 +1084,20 @@
while (!re_string_eoi (&mctx->input))
{
re_dfastate_t *old_state = cur_state;
+ int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
+
+ if (BE (next_char_idx >= mctx->input.bufs_len, 0)
+ || (BE (next_char_idx >= mctx->input.valid_len, 0)
+ && mctx->input.valid_len < mctx->input.len))
+ {
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ assert (err == REG_ESPACE);
+ return -2;
+ }
+ }
+
cur_state = transit_state (&err, mctx, cur_state);
if (mctx->state_log != NULL)
cur_state = merge_state_with_log (&err, mctx, cur_state);
@@ -1091,10 +1116,10 @@
break;
}
- if (at_init_state)
+ if (BE (at_init_state, 0))
{
if (old_state == cur_state)
- next_start_idx = re_string_cur_idx (&mctx->input);
+ next_start_idx = next_char_idx;
else
at_init_state = 0;
}
@@ -1110,13 +1135,16 @@
/* We found an appropriate halt state. */
match_last = re_string_cur_idx (&mctx->input);
match = 1;
+
+ /* We found a match, do not modify match_first below. */
+ p_match_first = NULL;
if (!fl_longest_match)
break;
}
}
- }
+ }
- if (match_last == -1 && p_match_first)
+ if (p_match_first)
*p_match_first += next_start_idx;
return match_last;
@@ -2168,22 +2196,12 @@
re_match_context_t *mctx;
re_dfastate_t *state;
{
- re_dfa_t *const dfa = mctx->dfa;
re_dfastate_t **trtable;
unsigned char ch;
- if (re_string_cur_idx (&mctx->input) + 1 >= mctx->input.bufs_len
- || (re_string_cur_idx (&mctx->input) + 1 >= mctx->input.valid_len
- && mctx->input.valid_len < mctx->input.len))
- {
- *err = extend_buffers (mctx);
- if (BE (*err != REG_NOERROR, 0))
- return NULL;
- }
-
#ifdef RE_ENABLE_I18N
/* If the current state can accept multibyte. */
- if (state->accept_mb)
+ if (BE (state->accept_mb, 0))
{
*err = transit_state_mb (mctx, state);
if (BE (*err != REG_NOERROR, 0))
@@ -2194,32 +2212,34 @@
/* Then decide the next state with the single byte. */
if (1)
{
- /* Use transition table */
+ /* Use transition table. Sorry for the goto, but we really need
+ to squeeze every single instruction here. */
ch = re_string_fetch_byte (&mctx->input);
+
+ retry:
trtable = state->trtable;
- if (trtable == NULL)
+ if (BE (trtable != NULL, 1))
+ return trtable[ch];
+
+ trtable = state->word_trtable;
+ if (BE (trtable != NULL, 1))
{
- trtable = build_trtable (dfa, state);
- if (trtable == NULL)
- {
- *err = REG_ESPACE;
- return NULL;
- }
- }
- if (BE (state->word_trtable, 0))
+ unsigned int context;
+ context = re_string_context_at (&mctx->input,
+ re_string_cur_idx (&mctx->input) - 1,
+ mctx->eflags);
+ if (IS_WORD_CONTEXT (context))
+ return trtable[ch + SBC_MAX];
+ else
+ return trtable[ch];
+ }
+
+ if (!build_trtable (mctx->dfa, state))
{
- unsigned int context;
- context
- = re_string_context_at (&mctx->input,
- re_string_cur_idx (&mctx->input) - 1,
- mctx->eflags);
- if (IS_WORD_CONTEXT (context))
- return trtable[ch + SBC_MAX];
- else
- return trtable[ch];
- }
- else
- return trtable[ch];
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ goto retry;
}
#if 0
else
@@ -3228,7 +3248,7 @@
/* Build transition table for the state.
Return the new table if succeeded, otherwise return NULL. */
-static re_dfastate_t **
+static int
build_trtable (dfa, state)
re_dfa_t *dfa;
re_dfastate_t *state;
@@ -3238,6 +3258,7 @@
unsigned int elem, mask;
int dests_node_malloced = 0, dest_states_malloced = 0;
int ndests; /* Number of the destination states from `state'. */
+ int need_word_trtable = 0;
re_dfastate_t **trtable;
re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
re_node_set follows, *dests_node;
@@ -3258,14 +3279,11 @@
dests_node = (re_node_set *)
malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
if (BE (dests_node == NULL, 0))
- return NULL;
+ return 0;
dests_node_malloced = 1;
}
dests_ch = (bitset *) (dests_node + SBC_MAX);
- /* Initialize transiton table. */
- state->word_trtable = 0;
-
/* At first, group all nodes belonging to `state' into several
destinations. */
ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
@@ -3273,14 +3291,12 @@
{
if (dests_node_malloced)
free (dests_node);
- /* Return NULL in case of an error, trtable otherwise. */
if (ndests == 0)
- {
- state->trtable = (re_dfastate_t **)
- calloc (sizeof (re_dfastate_t *), SBC_MAX);;
- return state->trtable;
- }
- return NULL;
+ state->trtable = (re_dfastate_t **)
+ calloc (sizeof (re_dfastate_t *), SBC_MAX);;
+
+ /* Return 0 in case of an error, 1 otherwise. */
+ return state->trtable != NULL;
}
err = re_node_set_alloc (&follows, ndests + 1);
@@ -3307,7 +3323,7 @@
re_node_set_free (dests_node + i);
if (dests_node_malloced)
free (dests_node);
- return NULL;
+ return 0;
}
dest_states_malloced = 1;
}
@@ -3343,9 +3359,11 @@
if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
goto out_free;
+#ifdef RE_ENABLE_I18N
if (dest_states[i] != dest_states_word[i]
&& dfa->mb_cur_max > 1)
- state->word_trtable = 1;
+ need_word_trtable = 1;
+#endif
dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
CONTEXT_NEWLINE);
@@ -3360,7 +3378,7 @@
bitset_merge (acceptable, dests_ch[i]);
}
- if (!BE (state->word_trtable, 0))
+ if (!BE (need_word_trtable, 0))
{
/* We don't care about whether the following character is a word
character, or we are in a single-byte character set so we can
@@ -3389,6 +3407,7 @@
trtable[ch] = dest_states[j];
}
}
+#ifdef RE_ENABLE_I18N
else
{
/* We care about whether the following character is a word
@@ -3418,6 +3437,7 @@
trtable[ch + SBC_MAX] = dest_states_word[j];
}
}
+#endif
/* new line */
if (bitset_contain (acceptable, NEWLINE_CHAR))
@@ -3428,7 +3448,7 @@
{
/* k-th destination accepts newline character. */
trtable[NEWLINE_CHAR] = dest_states_nl[j];
- if (state->word_trtable)
+ if (need_word_trtable)
trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
/* There must be only one destination which accepts
newline. See group_nodes_into_DFAstates. */
@@ -3446,8 +3466,12 @@
if (dests_node_malloced)
free (dests_node);
- state->trtable = trtable;
- return trtable;
+ if (need_word_trtable)
+ state->word_trtable = trtable;
+ else
+ state->trtable = trtable;
+
+ return 1;
}
/* Group all nodes belonging to STATE into several destinations.
@@ -4079,28 +4103,6 @@
match_ctx_clean (mctx)
re_match_context_t *mctx;
{
- match_ctx_free_subtops (mctx);
- mctx->nsub_tops = 0;
- mctx->nbkref_ents = 0;
-}
-
-/* Free all the memory associated with MCTX. */
-
-static void
-match_ctx_free (mctx)
- re_match_context_t *mctx;
-{
- match_ctx_free_subtops (mctx);
- re_free (mctx->sub_tops);
- re_free (mctx->bkref_ents);
-}
-
-/* Free all the memory associated with MCTX->SUB_TOPS. */
-
-static void
-match_ctx_free_subtops (mctx)
- re_match_context_t *mctx;
-{
int st_idx;
for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
{
@@ -4120,8 +4122,24 @@
}
free (top);
}
+
+ mctx->nsub_tops = 0;
+ mctx->nbkref_ents = 0;
}
+/* Free all the memory associated with MCTX. */
+
+static void
+match_ctx_free (mctx)
+ re_match_context_t *mctx;
+{
+ /* First, free all the memory associated with MCTX->SUB_TOPS. */
+ match_ctx_clean (mctx);
+ re_free (mctx->sub_tops);
+ re_free (mctx->bkref_ents);
+}
+
+
/* Add a new backreference entry to MCTX.
Note that we assume that caller never call this function with duplicate
entry, and call with STR_IDX which isn't smaller than any existing entry.
diff -u save/regex_internal.c ./regex_internal.c
--- save/regex_internal.c 2004-03-24 12:04:14.000000000 +0100
+++ ./regex_internal.c 2004-03-24 12:05:05.000000000 +0100
@@ -581,7 +581,7 @@
int idx, eflags;
{
int offset = idx - pstr->raw_mbs_idx;
- if (offset < 0)
+ if (BE (offset < 0, 0))
{
/* Reset buffer. */
#ifdef RE_ENABLE_I18N
@@ -601,10 +601,10 @@
offset = idx;
}
- if (offset != 0)
+ if (BE (offset != 0, 1))
{
/* Are the characters which are already checked remain? */
- if (offset < pstr->valid_raw_len
+ if (BE (offset < pstr->valid_raw_len, 1)
#ifdef RE_ENABLE_I18N
/* Handling this would enlarge the code too much.
Accept a slowdown in that case. */
@@ -619,7 +619,7 @@
memmove (pstr->wcs, pstr->wcs + offset,
(pstr->valid_len - offset) * sizeof (wint_t));
#endif /* RE_ENABLE_I18N */
- if (pstr->mbs_allocated)
+ if (BE (pstr->mbs_allocated, 0))
memmove (pstr->mbs, pstr->mbs + offset,
pstr->valid_len - offset);
pstr->valid_len -= offset;
@@ -717,7 +717,7 @@
? CONTEXT_NEWLINE : 0));
}
}
- if (!pstr->mbs_allocated)
+ if (!BE (pstr->mbs_allocated, 0))
pstr->mbs += offset;
}
pstr->raw_mbs_idx = idx;
@@ -739,16 +739,17 @@
}
else
#endif /* RE_ENABLE_I18N */
+ if (BE (pstr->mbs_allocated, 0))
{
if (pstr->icase)
build_upper_buffer (pstr);
else if (pstr->trans != NULL)
re_string_translate_buffer (pstr);
- else
- pstr->valid_len = pstr->len;
}
- pstr->cur_idx = 0;
+ else
+ pstr->valid_len = pstr->len;
+ pstr->cur_idx = 0;
return REG_NOERROR;
}
@@ -846,16 +847,13 @@
int idx, eflags;
{
int c;
- if (idx < 0 || idx == input->len)
- {
- if (idx < 0)
- /* In this case, we use the value stored in input->tip_context,
- since we can't know the character in input->mbs[-1] here. */
- return input->tip_context;
- else /* (idx == input->len) */
- return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
- : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
- }
+ if (BE (idx < 0, 0))
+ /* In this case, we use the value stored in input->tip_context,
+ since we can't know the character in input->mbs[-1] here. */
+ return input->tip_context;
+ if (BE (idx == input->len, 0))
+ return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
+ : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
#ifdef RE_ENABLE_I18N
if (input->mb_cur_max > 1)
{
@@ -1650,5 +1648,6 @@
}
re_node_set_free (&state->nodes);
re_free (state->trtable);
+ re_free (state->word_trtable);
re_free (state);
}