This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: suggested patches for regex routines from gawk
- From: OndÅej BÃlka <neleai at seznam dot cz>
- To: Aharon Robbins <arnold at skeeve dot com>
- Cc: libc-alpha at sourceware dot org
- Date: Fri, 28 Dec 2012 12:04:24 +0100
- Subject: Re: suggested patches for regex routines from gawk
- References: <201212281009.qBSA9psa003299@skeeve.com>
On Fri, Dec 28, 2012 at 12:09:51PM +0200, Aharon Robbins wrote:
> Hello All.
>
> The other category is simple portability fixes. This falls into a few
> subcategories - removing a few gratutious GCC-isms, adding missing #ifdefs
> (or rearranging code to use them properly), and adding code for non-GCC
> compilers in one or two instances, including variable declarations at
> the top of blocks for C 90 compilers. A few unused variables are also
> just deleted.
>
Could you split it into two patches?
These bits look obvious for me. I am not familiar enough with reg* code to
judge rest.
> -------------------------------
> diff --git a/posix/regcomp.c b/posix/regcomp.c
> index e85b235..482c41b 100644
> --- a/posix/regcomp.c
> +++ b/posix/regcomp.c
> @@ -301,7 +301,7 @@ static void
> re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
> char *fastmap)
> {
> - re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
> + volatile re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
Why volatile here?
> int node_cnt;
> int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
> for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
> @@ -315,7 +315,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
> #ifdef RE_ENABLE_I18N
> if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
> {
> - unsigned char *buf = alloca (dfa->mb_cur_max), *p;
> + unsigned char *buf = re_malloc (unsigned char, dfa->mb_cur_max), *p;
I would use malloca which I split to separate thread.
> wchar_t wc;
> mbstate_t state;
>
> @@ -331,6 +331,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
> && (__wcrtomb ((char *) buf, towlower (wc), &state)
> != (size_t) -1))
> re_set_fastmap (fastmap, 0, buf[0]);
> + re_free (buf);
> }
> #endif
> }
snip
> @@ -925,9 +943,8 @@ static void
> internal_function
> init_word_char (re_dfa_t *dfa)
> {
> + int i, j, ch;
> dfa->word_ops_used = 1;
> - int i = 0;
> - int ch = 0;
> if (BE (dfa->map_notascii == 0, 1))
> {
> if (sizeof (dfa->word_char[0]) == 8)
> @@ -959,8 +976,8 @@ init_word_char (re_dfa_t *dfa)
> }
> }
>
> - for (; i < BITSET_WORDS; ++i)
> - for (int j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
> + for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
> + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
> if (isalnum (ch) || ch == '_')
> dfa->word_char[i] |= (bitset_word_t) 1 << j;
> }
snip
> @@ -3446,6 +3459,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
> return REG_ECOLLATE;
>
> /* Build single byte matcing table for this equivalence class. */
> + char_buf[1] = (unsigned char) '\0';
> len = weights[idx1 & 0xffffff];
> for (ch = 0; ch < SBC_MAX; ++ch)
> {
> diff --git a/posix/regex_internal.c b/posix/regex_internal.c
> index 9be8a53..c3960b0 100644
> --- a/posix/regex_internal.c
> +++ b/posix/regex_internal.c
> @@ -518,7 +518,7 @@ re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
> /* Then proceed the next character. */
> rawbuf_idx += mbclen;
> }
> - *last_wc = wc;
> + *last_wc = (wint_t) wc;
> return rawbuf_idx;
> }
> #endif /* RE_ENABLE_I18N */
> @@ -686,10 +686,10 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
> }
> else
> {
> +#ifdef RE_ENABLE_I18N
> /* No, skip all characters until IDX. */
> int prev_valid_len = pstr->valid_len;
>
> -#ifdef RE_ENABLE_I18N
> if (BE (pstr->offsets_needed, 0))
> {
> pstr->len = pstr->raw_len - idx + offset;
> @@ -1408,7 +1418,6 @@ static int
> internal_function
> re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
> {
> - int type = token.type;
> if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
> {
> size_t new_nodes_alloc = dfa->nodes_alloc * 2;
> @@ -1444,7 +1453,7 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
> dfa->nodes[dfa->nodes_len].constraint = 0;
> #ifdef RE_ENABLE_I18N
> dfa->nodes[dfa->nodes_len].accept_mb =
> - (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
> + (token.type == OP_PERIOD && dfa->mb_cur_max > 1) || token.type == COMPLEX_BRACKET;
> #endif
> dfa->nexts[dfa->nodes_len] = -1;
> re_node_set_init_empty (dfa->edests + dfa->nodes_len);
> diff --git a/posix/regex_internal.h b/posix/regex_internal.h
> index 6dfdef6..54bbd1e 100644
> --- a/posix/regex_internal.h
> +++ b/posix/regex_internal.h
> @@ -98,6 +98,9 @@
> # define BE(expr, val) __builtin_expect (expr, val)
> #else
> # define BE(expr, val) (expr)
> +# ifdef inline
> +# undef inline
> +# endif
> # define inline
> #endif
>
> @@ -112,7 +115,13 @@
>
> /* Rename to standard API for using out of glibc. */
> #ifndef _LIBC
> +# ifdef __wctype
> +# undef __wctype
> +# endif
> # define __wctype wctype
> +# ifdef __iswctype
> +# undef __iswctype
> +# endif
> # define __iswctype iswctype
> # define __btowc btowc
> # define __mbrtowc mbrtowc
snip
> @@ -3915,6 +3923,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
> if (cset->nequiv_classes)
> {
> const unsigned char *cp = pin;
> + int32_t idx;
> table = (const int32_t *)
> _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
> weights = (const unsigned char *)
> @@ -3923,7 +3932,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
> _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
> indirect = (const int32_t *)
> _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
> - int32_t idx = findidx (&cp, elem_len);
> + idx = findidx (&cp, elem_len);
> if (idx > 0)
> for (i = 0; i < cset->nequiv_classes; ++i)
> {
snip
--
The cables are not the same length.