This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Re: suggested patches for regex routines from gawk

From: OndÅej BÃlka <neleai at seznam dot cz>
To: Aharon Robbins <arnold at skeeve dot com>
Cc: libc-alpha at sourceware dot org
Date: Fri, 28 Dec 2012 12:04:24 +0100
Subject: Re: suggested patches for regex routines from gawk
References: <201212281009.qBSA9psa003299@skeeve.com>

On Fri, Dec 28, 2012 at 12:09:51PM +0200, Aharon Robbins wrote:
> Hello All.
> 
> The other category is simple portability fixes. This falls into a few
> subcategories - removing a few gratutious GCC-isms, adding missing #ifdefs
> (or rearranging code to use them properly), and adding code for non-GCC
> compilers in one or two instances, including variable declarations at
> the top of blocks for C 90 compilers. A few unused variables are also
> just deleted.
> 
Could you split it into two patches?
These bits look obvious for me. I am not familiar enough with reg* code to
judge rest.
> -------------------------------
> diff --git a/posix/regcomp.c b/posix/regcomp.c
> index e85b235..482c41b 100644
> --- a/posix/regcomp.c
> +++ b/posix/regcomp.c
> @@ -301,7 +301,7 @@ static void
>  re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
>  			 char *fastmap)
>  {
> -  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
> +  volatile re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
Why volatile here? 
>    int node_cnt;
>    int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
>    for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
> @@ -315,7 +315,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
>  #ifdef RE_ENABLE_I18N
>  	  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
>  	    {
> -	      unsigned char *buf = alloca (dfa->mb_cur_max), *p;
> +	      unsigned char *buf = re_malloc (unsigned char, dfa->mb_cur_max), *p;
I would use malloca which I split to separate thread.
>  	      wchar_t wc;
>  	      mbstate_t state;
>  
> @@ -331,6 +331,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
>  		  && (__wcrtomb ((char *) buf, towlower (wc), &state)
>  		      != (size_t) -1))
>  		re_set_fastmap (fastmap, 0, buf[0]);
> +	      re_free (buf);
>  	    }
>  #endif
>  	}
snip
> @@ -925,9 +943,8 @@ static void
>  internal_function
>  init_word_char (re_dfa_t *dfa)
>  {
> +  int i, j, ch;
>    dfa->word_ops_used = 1;
> -  int i = 0;
> -  int ch = 0;
>    if (BE (dfa->map_notascii == 0, 1))
>      {
>        if (sizeof (dfa->word_char[0]) == 8)
> @@ -959,8 +976,8 @@ init_word_char (re_dfa_t *dfa)
>  	}
>      }
>  
> -  for (; i < BITSET_WORDS; ++i)
> -    for (int j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
> +  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
> +    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
>        if (isalnum (ch) || ch == '_')
>  	dfa->word_char[i] |= (bitset_word_t) 1 << j;
>  }
snip
> @@ -3446,6 +3459,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
>  	return REG_ECOLLATE;
>  
>        /* Build single byte matcing table for this equivalence class.  */
> +      char_buf[1] = (unsigned char) '\0';
>        len = weights[idx1 & 0xffffff];
>        for (ch = 0; ch < SBC_MAX; ++ch)
>  	{
> diff --git a/posix/regex_internal.c b/posix/regex_internal.c
> index 9be8a53..c3960b0 100644
> --- a/posix/regex_internal.c
> +++ b/posix/regex_internal.c
> @@ -518,7 +518,7 @@ re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
>        /* Then proceed the next character.  */
>        rawbuf_idx += mbclen;
>      }
> -  *last_wc = wc;
> +  *last_wc = (wint_t) wc;
>    return rawbuf_idx;
>  }
>  #endif /* RE_ENABLE_I18N  */
> @@ -686,10 +686,10 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
>  	}
>        else
>  	{
> +#ifdef RE_ENABLE_I18N
>  	  /* No, skip all characters until IDX.  */
>  	  int prev_valid_len = pstr->valid_len;
>  
> -#ifdef RE_ENABLE_I18N
>  	  if (BE (pstr->offsets_needed, 0))
>  	    {
>  	      pstr->len = pstr->raw_len - idx + offset;
> @@ -1408,7 +1418,6 @@ static int
>  internal_function
>  re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
>  {
> -  int type = token.type;
>    if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
>      {
>        size_t new_nodes_alloc = dfa->nodes_alloc * 2;
> @@ -1444,7 +1453,7 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
>    dfa->nodes[dfa->nodes_len].constraint = 0;
>  #ifdef RE_ENABLE_I18N
>    dfa->nodes[dfa->nodes_len].accept_mb =
> -    (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
> +    (token.type == OP_PERIOD && dfa->mb_cur_max > 1) || token.type == COMPLEX_BRACKET;
>  #endif
>    dfa->nexts[dfa->nodes_len] = -1;
>    re_node_set_init_empty (dfa->edests + dfa->nodes_len);
> diff --git a/posix/regex_internal.h b/posix/regex_internal.h
> index 6dfdef6..54bbd1e 100644
> --- a/posix/regex_internal.h
> +++ b/posix/regex_internal.h
> @@ -98,6 +98,9 @@
>  # define BE(expr, val) __builtin_expect (expr, val)
>  #else
>  # define BE(expr, val) (expr)
> +# ifdef inline
> +# undef inline
> +# endif
>  # define inline
>  #endif
>  
> @@ -112,7 +115,13 @@
>  
>  /* Rename to standard API for using out of glibc.  */
>  #ifndef _LIBC
> +# ifdef __wctype
> +# undef __wctype
> +# endif
>  # define __wctype wctype
> +# ifdef __iswctype
> +# undef __iswctype
> +# endif
>  # define __iswctype iswctype
>  # define __btowc btowc
>  # define __mbrtowc mbrtowc
snip
> @@ -3915,6 +3923,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
>  	  if (cset->nequiv_classes)
>  	    {
>  	      const unsigned char *cp = pin;
> +          int32_t idx;
>  	      table = (const int32_t *)
>  		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
>  	      weights = (const unsigned char *)
> @@ -3923,7 +3932,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
>  		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
>  	      indirect = (const int32_t *)
>  		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
> -	      int32_t idx = findidx (&cp, elem_len);
> +	      idx = findidx (&cp, elem_len);
>  	      if (idx > 0)
>  		for (i = 0; i < cset->nequiv_classes; ++i)
>  		  {
snip

-- 

The cables are not the same length.

References:
- suggested patches for regex routines from gawk
  - From: Aharon Robbins

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]