This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PING][PATCH v3] Improve fnmatch performance.


On Tue, Jun 09, 2015 at 04:27:48PM +0200, OndÅej BÃlka wrote:
> On Wed, May 13, 2015 at 09:48:27AM -0700, Paul Eggert wrote:
> > OndÅej BÃlka wrote:
> > >How to synchronize this with gnulib? Only implementation specific detail
> > >is utf8 detection.
> > 
> > It could be something like this:
> > 
> >  #if _LIBC
> >   struct __locale_data *current = _NL_CURRENT_LOCALE->__locales[LC_COLLATE];
> >   uint_fast32_t encoding =
> >     current->values[_NL_ITEM_INDEX (_NL_COLLATE_ENCODING_TYPE)].word;
> >   bool is_utf8 = encoding == !__cet_other;
> >  #else
> >   bool is_utf8 = STRCASEEQ (locale_charset (),
> >                             "UTF-8", 'U','T','F','-','8',0,0,0,0)
> >  #endif
> > 
> > We should package this sort of thing up and make it easier to use,
> > but that could be another day.
> 
> Yes I will use that pattern, it needs to change details like that it
> also works for single-byte encodings.
> 
> I also removed expect on MB_CUR_MAX as unicode is widespread.
> 
> Also I now return directly match when entire pattern is normal and
> FNM_PERIOD or FNM_FILE_NAME wasn't set which could also help performance
> a bit.
> 
> Then I could allow nonascii characters to start pattern unless its utf8
> and you have FNM_CASEFOLD, would it be better to add two tables or check
> for testing these?
> 
> 	* posix/fnmatch.c (fnmatch): Improve performance.
> 
> diff --git a/posix/fnmatch.c b/posix/fnmatch.c
> index fd85efa..4c32992 100644
> --- a/posix/fnmatch.c
> +++ b/posix/fnmatch.c
> @@ -131,6 +131,13 @@ extern int fnmatch (const char *pattern, const char *string, int flags);
>  #   define ISWCTYPE(WC, WT)	iswctype (WC, WT)
>  #  endif
>  
> +#  ifdef _LIBC
> +#   define STRCASESTR		__strcasestr
> +#  else
> +#   define STRCASESTR		strcasestr
> +#  endif
> +
> +
>  #  if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || _LIBC
>  /* In this case we are implementing the multibyte character handling.  */
>  #   define HANDLE_MULTIBYTE	1
> @@ -332,8 +339,62 @@ fnmatch (pattern, string, flags)
>       const char *string;
>       int flags;
>  {
> +
> +  /* ASCII with \+/.*?[{(@! excluded.  */
> +  static unsigned char normal[256] = {
> + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
> + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
> + 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 
> + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
> + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
> + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
> + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
> + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
> + };
> +#  if _LIBC
> +   struct __locale_data *current = _NL_CURRENT_LOCALE->__locales[LC_COLLATE];
> +   uint_fast32_t encoding =
> +     current->values[_NL_ITEM_INDEX (_NL_COLLATE_ENCODING_TYPE)].word;
> +   bool fast_encoding = (encoding != __cet_other);
> +#  else
> +#   if HANDLE_MULTIBYTE
> +   bool is_utf8 = STRCASEEQ (locale_charset (),
> +                             "UTF-8", 'U','T','F','-','8',0,0,0,0);
> +   bool fast_encoding = (MB_CUR_MAX == 1) || is_utf;
> +#   else
> +   bool fast_encoding = true;
> +#   endif
> +#  endif
> +
> +  if (fast_encoding)
> +    {
> +      char start[8];
> +      char *string2;
> +      size_t i;
> +      for (i = 0; i < 7 && normal[(unsigned char) pattern[i]]; i++)
> +        start[i] = pattern[i];
> +      start[i] = 0;
> +      if (flags & FNM_CASEFOLD)
> +        string2 = STRCASESTR (string, start);
> +      else  
> +        string2 = strstr (string, start);
> +      if (!string2)
> +        return FNM_NOMATCH;
> + 
> +      if (pattern[i] == '\0' && (flags & (FNM_FILE_NAME | FNM_PERIOD)) == 0)
> +        return 0; 
> +    }
> +
>  # if HANDLE_MULTIBYTE
> -  if (__builtin_expect (MB_CUR_MAX, 1) != 1)
> +  if (MB_CUR_MAX != 1)
>      {
>        mbstate_t ps;
>        size_t n;

-- 

We need a licensed electrician to replace the light bulbs in the computer room.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]