This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH v3] Improve strtok(_r) performance


LGTM.

On 13/12/2016 11:20, Wilco Dijkstra wrote:
> Version 3 fixes a minor style issue in bench-strtok.c.
> 
> Improve strtok and strtok_r performance.  Instead of calling strpbrk which
> calls strcspn, call strcspn directly so we get the end of the token without
> an extra call to rawmemchr.  Also avoid an unnecessary call to strcspn after
> the last token by adding an early exit for an empty string.  Change strtok
> to tailcall strtok_r to avoid unnecessary code duplication.
> 
> Remove the special header optimization for strtok_r of a 1-character
> constant string - both strspn and strcspn contain optimizations for this
> case.  Benchmarking this showed similar performance in the worst case,
> but up to 5.5x better performance in the "found" case for large inputs.
> 
> Passes regression tests, OK for commit?
> 
> ChangeLog:
> 2015-12-13  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	* benchtests/bench-strtok.c (oldstrtok): Add old implementation.
> 	* string/strtok.c (strtok): Change to tailcall __strtok_r.
> 	* string/strtok_r.c (__strtok_r): Optimize for performance.
> 	* string/string-inlines.c (__old_strtok_r_1c): New function.
> 	* string/bits/string2.h (__strtok_r): Move to string-inlines.c
> --
> diff --git a/benchtests/bench-strtok.c b/benchtests/bench-strtok.c
> index eeb798f01575c712b958ad1b7d5f88f91e158202..41e0e45db818428a3f3a940fa1a70ca3850e84b3 100644
> --- a/benchtests/bench-strtok.c
> +++ b/benchtests/bench-strtok.c
> @@ -20,13 +20,41 @@
>  #define TEST_NAME "strtok"
>  #include "bench-string.h"
>  
> -#define STRTOK strtok_string
> -#include <string/strtok.c>
> +char *
> +oldstrtok (char *s, const char *delim)
> +{
> +  static char *olds;
> +  char *token;
> +
> +  if (s == NULL)
> +    s = olds;
> +
> +  /* Scan leading delimiters.  */
> +  s += strspn (s, delim);
> +  if (*s == '\0')
> +    {
> +      olds = s;
> +      return NULL;
> +    }
>  
> +  /* Find the end of the token.  */
> +  token = s;
> +  s = strpbrk (token, delim);
> +  if (s == NULL)
> +    /* This token finishes the string.  */
> +    olds = __rawmemchr (token, '\0');
> +  else
> +    {
> +      /* Terminate the token and make OLDS point past it.  */
> +      *s = '\0';
> +      olds = s + 1;
> +    }
> +  return token;
> +}
>  
>  typedef char *(*proto_t) (const char *, const char *);
>  
> -IMPL (strtok_string, 0)
> +IMPL (oldstrtok, 0)
>  IMPL (strtok, 1)
>  
>  static void
> diff --git a/string/bits/string2.h b/string/bits/string2.h
> index ca1eda9bd127aadb01ab5c929d16621357ddc6e6..e39d4f1a85c25a4f47418e6a5613b27177ca6cbb 100644
> --- a/string/bits/string2.h
> +++ b/string/bits/string2.h
> @@ -180,45 +180,6 @@ extern void *__rawmemchr (const void *__s, int __c);
>  #endif
>  
>  
> -#if !defined _HAVE_STRING_ARCH_strtok_r || defined _FORCE_INLINES
> -# ifndef _HAVE_STRING_ARCH_strtok_r
> -#  define __strtok_r(s, sep, nextp) \
> -  (__extension__ (__builtin_constant_p (sep) && __string2_1bptr_p (sep)	      \
> -		  && ((const char *) (sep))[0] != '\0'			      \
> -		  && ((const char *) (sep))[1] == '\0'			      \
> -		  ? __strtok_r_1c (s, ((const char *) (sep))[0], nextp)       \
> -		  : __strtok_r (s, sep, nextp)))
> -# endif
> -
> -__STRING_INLINE char *__strtok_r_1c (char *__s, char __sep, char **__nextp);
> -__STRING_INLINE char *
> -__strtok_r_1c (char *__s, char __sep, char **__nextp)
> -{
> -  char *__result;
> -  if (__s == NULL)
> -    __s = *__nextp;
> -  while (*__s == __sep)
> -    ++__s;
> -  __result = NULL;
> -  if (*__s != '\0')
> -    {
> -      __result = __s++;
> -      while (*__s != '\0')
> -	if (*__s++ == __sep)
> -	  {
> -	    __s[-1] = '\0';
> -	    break;
> -	  }
> -    }
> -  *__nextp = __s;
> -  return __result;
> -}
> -# ifdef __USE_POSIX
> -#  define strtok_r(s, sep, nextp) __strtok_r (s, sep, nextp)
> -# endif
> -#endif
> -
> -
>  #if !defined _HAVE_STRING_ARCH_strsep || defined _FORCE_INLINES
>  # ifndef _HAVE_STRING_ARCH_strsep
>  
> diff --git a/string/string-inlines.c b/string/string-inlines.c
> index 1091468519e1561ac2a4e9c3ed6eb75ee9fdf43f..d43e5897c37430e5f97940469d65e7ddbdcbd09c 100644
> --- a/string/string-inlines.c
> +++ b/string/string-inlines.c
> @@ -35,6 +35,36 @@
>  
>  #include "shlib-compat.h"
>  
> +#if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_25)
> +/* The inline functions are not used from GLIBC 2.25 and forward, however
> +   they are required to provide the symbols through string-inlines.c
> +   (if inlining is not possible for compatibility reasons).  */
> +
> +char *
> +__old_strtok_r_1c (char *__s, char __sep, char **__nextp)
> +{
> +  char *__result;
> +  if (__s == NULL)
> +    __s = *__nextp;
> +  while (*__s == __sep)
> +    ++__s;
> +  __result = NULL;
> +  if (*__s != '\0')
> +    {
> +      __result = __s++;
> +      while (*__s != '\0')
> +	if (*__s++ == __sep)
> +	  {
> +	    __s[-1] = '\0';
> +	    break;
> +	  }
> +    }
> +  *__nextp = __s;
> +  return __result;
> +}
> +compat_symbol (libc, __old_strtok_r_1c, __strtok_r_1c, GLIBC_2_1_1);
> +#endif
> +
>  #if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_24)
>  /* The inline functions are not used from GLIBC 2.24 and forward, however
>     they are required to provide the symbols through string-inlines.c
> diff --git a/string/strtok.c b/string/strtok.c
> index 7a4574db5c80501e47d045ad4347e8a287b32191..482cdc1da45a71173080b6eff3857e863b5977ea 100644
> --- a/string/strtok.c
> +++ b/string/strtok.c
> @@ -18,14 +18,6 @@
>  #include <string.h>
>  
>  
> -static char *olds;
> -
> -#undef strtok
> -
> -#ifndef STRTOK
> -# define STRTOK strtok
> -#endif
> -
>  /* Parse S into tokens separated by characters in DELIM.
>     If S is NULL, the last string strtok() was called with is
>     used.  For example:
> @@ -36,32 +28,8 @@ static char *olds;
>  		// s = "abc\0=-def\0"
>  */
>  char *
> -STRTOK (char *s, const char *delim)
> +strtok (char *s, const char *delim)
>  {
> -  char *token;
> -
> -  if (s == NULL)
> -    s = olds;
> -
> -  /* Scan leading delimiters.  */
> -  s += strspn (s, delim);
> -  if (*s == '\0')
> -    {
> -      olds = s;
> -      return NULL;
> -    }
> -
> -  /* Find the end of the token.  */
> -  token = s;
> -  s = strpbrk (token, delim);
> -  if (s == NULL)
> -    /* This token finishes the string.  */
> -    olds = __rawmemchr (token, '\0');
> -  else
> -    {
> -      /* Terminate the token and make OLDS point past it.  */
> -      *s = '\0';
> -      olds = s + 1;
> -    }
> -  return token;
> +  static char *olds;
> +  return __strtok_r (s, delim, &olds);
>  }
> diff --git a/string/strtok_r.c b/string/strtok_r.c
> index f351304766108dad2c1cff881ad3bebae821b2a0..2d251f90d79b6c546e80e1d25c03955ea8dad92b 100644
> --- a/string/strtok_r.c
> +++ b/string/strtok_r.c
> @@ -22,14 +22,10 @@
>  
>  #include <string.h>
>  
> -#undef strtok_r
> -#undef __strtok_r
> -
>  #ifndef _LIBC
>  /* Get specification.  */
>  # include "strtok_r.h"
>  # define __strtok_r strtok_r
> -# define __rawmemchr strchr
>  #endif
>  
>  /* Parse S into tokens separated by characters in DELIM.
> @@ -45,11 +41,17 @@
>  char *
>  __strtok_r (char *s, const char *delim, char **save_ptr)
>  {
> -  char *token;
> +  char *end;
>  
>    if (s == NULL)
>      s = *save_ptr;
>  
> +  if (*s == '\0')
> +    {
> +      *save_ptr = s;
> +      return NULL;
> +    }
> +
>    /* Scan leading delimiters.  */
>    s += strspn (s, delim);
>    if (*s == '\0')
> @@ -59,18 +61,17 @@ __strtok_r (char *s, const char *delim, char **save_ptr)
>      }
>  
>    /* Find the end of the token.  */
> -  token = s;
> -  s = strpbrk (token, delim);
> -  if (s == NULL)
> -    /* This token finishes the string.  */
> -    *save_ptr = __rawmemchr (token, '\0');
> -  else
> +  end = s + strcspn (s, delim);
> +  if (*end == '\0')
>      {
> -      /* Terminate the token and make *SAVE_PTR point past it.  */
> -      *s = '\0';
> -      *save_ptr = s + 1;
> +      *save_ptr = end;
> +      return s;
>      }
> -  return token;
> +
> +  /* Terminate the token and make *SAVE_PTR point past it.  */
> +  *end = '\0';
> +  *save_ptr = end + 1;
> +  return s;
>  }
>  #ifdef weak_alias
>  libc_hidden_def (__strtok_r)
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]