This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH v2] Improve strstr performance



On 30/06/2018 11:30, Wilco Dijkstra wrote:
> Version 2 uses __strnlen to avoid namespace issues.
> 
> Improve strstr performance.  Strstr tends to be slow because it uses
> many calls to memchr and a slow byte loop to scan for the next match.
> Performance is significantly improved by using strnlen on larger blocks
> and using strchr to search for the next matching character.  strcasestr
> can also use strnlen to scan ahead, and memmem can use memchr to check
> for the next match.
> 
> On the GLIBC bench tests the performance gains on Cortex-A72 are:
> strstr: +25%
> strcasestr: +4.3%
> memmem: +18%
> 
> On a 256KB dataset strstr performance improves by 67%, strcasestr by 47%.
> 
> ChangeLog:
> 2018-06-30  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	* benchtests/bench-strcasestr.c: Rename __strnlen to strnlen.
> 	* benchtests/bench-strstr.c: Likewise.
> 	* string/memmem.c (FASTSEARCH): Define.	
> 	* string/str-two-way.h (two_way_short_needle): Minor cleanups.
> 	Add support for FASTSEARCH.
> 	* string/strcasestr.c (AVAILABLE): Use read-ahead __strnlen.
> 	* string/strstr.c (AVAILABLE): Use read-ahead __strnlen.
> 	(FASTSEARCH): Define.
> 	* string/test-strcasestr.c: Rename __strnlen to strnlen.
> 	* string/test-strstr.c: Likewise.

LGTM thanks.

> --
> diff --git a/benchtests/bench-strcasestr.c b/benchtests/bench-strcasestr.c
> index e6659ea79ec9b41427b8376f21b67dc326f60ec0..4337e0c18dbd5ed4a3743af51046da2dc6b6fe3a 100644
> --- a/benchtests/bench-strcasestr.c
> +++ b/benchtests/bench-strcasestr.c
> @@ -24,6 +24,7 @@
>  #define STRCASESTR simple_strcasestr
>  #define NO_ALIAS
>  #define __strncasecmp strncasecmp
> +#define __strnlen strnlen
>  #include "../string/strcasestr.c"
>  
>  
> diff --git a/benchtests/bench-strstr.c b/benchtests/bench-strstr.c
> index c30cd1078586fe66c9177707ba603bb6ba0476a7..a31294e3c96d80a4fd61bb5b423a825fe54d3227 100644
> --- a/benchtests/bench-strstr.c
> +++ b/benchtests/bench-strstr.c
> @@ -23,6 +23,7 @@
>  
>  #define STRSTR simple_strstr
>  #define libc_hidden_builtin_def(X)
> +#define __strnlen strnlen
>  #include "../string/strstr.c"
>  
>  
> diff --git a/string/memmem.c b/string/memmem.c
> index c17e1cf6a63c0709df0c1c7a45410c5178b2876e..43efaa3fb718e7a22c3b4122c278720768644d0f 100644
> --- a/string/memmem.c
> +++ b/string/memmem.c
> @@ -31,6 +31,7 @@
>  
>  #define RETURN_TYPE void *
>  #define AVAILABLE(h, h_l, j, n_l) ((j) <= (h_l) - (n_l))
> +#define FASTSEARCH(S,C,N) (void*) memchr ((void *)(S), (C), (N))
>  #include "str-two-way.h"
>  
>  #undef memmem
> diff --git a/string/str-two-way.h b/string/str-two-way.h
> index cd2605857d9b5c432c3c2b1c5e35f55d8e9285b6..523d946c59412e1f1f65b8ec3778553b83191952 100644
> --- a/string/str-two-way.h
> +++ b/string/str-two-way.h
> @@ -281,50 +281,50 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
>      }
>    else
>      {
> -      const unsigned char *phaystack = &haystack[suffix];
> +      const unsigned char *phaystack;
>        /* The comparison always starts from needle[suffix], so cache it
>  	 and use an optimized first-character loop.  */
>        unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]);
>  
> -#if CHECK_EOL
> -      /* We start matching from the SUFFIX'th element, so make sure we
> -	 don't hit '\0' before that.  */
> -      if (haystack_len < suffix + 1
> -	  && !AVAILABLE (haystack, haystack_len, 0, suffix + 1))
> -	return NULL;
> -#endif
> -
>        /* The two halves of needle are distinct; no extra memory is
>  	 required, and any mismatch results in a maximal shift.  */
>        period = MAX (suffix, needle_len - suffix) + 1;
>        j = 0;
> -      while (1
> -#if !CHECK_EOL
> -	     && AVAILABLE (haystack, haystack_len, j, needle_len)
> -#endif
> -	     )
> +      while (AVAILABLE (haystack, haystack_len, j, needle_len))
>  	{
>  	  unsigned char haystack_char;
>  	  const unsigned char *pneedle;
>  
> -	  /* TODO: The first-character loop can be sped up by adapting
> -	     longword-at-a-time implementation of memchr/strchr.  */
> -	  if (needle_suffix
> +	  phaystack = &haystack[suffix + j];
> +
> +#ifdef FASTSEARCH
> +	  if (*phaystack++ != needle_suffix)
> +	    {
> +	      phaystack = FASTSEARCH (phaystack, needle_suffix,
> +				      haystack_len - needle_len - j);
> +	      if (phaystack == NULL)
> +		goto ret0;
> +	      j = phaystack - &haystack[suffix];
> +	      phaystack++;
> +	    }
> +#else
> +	  while (needle_suffix
>  	      != (haystack_char = CANON_ELEMENT (*phaystack++)))
>  	    {
>  	      RET0_IF_0 (haystack_char);
> -#if !CHECK_EOL
> +# if !CHECK_EOL
>  	      ++j;
> -#endif
> -	      continue;
> +	      if (!AVAILABLE (haystack, haystack_len, j, needle_len))
> +		goto ret0;
> +# endif
>  	    }
>  
> -#if CHECK_EOL
> +# if CHECK_EOL
>  	  /* Calculate J if it wasn't kept up-to-date in the first-character
>  	     loop.  */
>  	  j = phaystack - &haystack[suffix] - 1;
> +# endif
>  #endif
> -
>  	  /* Scan for matches in right half.  */
>  	  i = suffix + 1;
>  	  pneedle = &needle[i];
> @@ -338,6 +338,11 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
>  		}
>  	      ++i;
>  	    }
> +#if CHECK_EOL
> +	  /* Update minimal length of haystack.  */
> +	  if (phaystack > haystack + haystack_len)
> +	    haystack_len = phaystack - haystack;
> +#endif
>  	  if (needle_len <= i)
>  	    {
>  	      /* Scan for matches in left half.  */
> @@ -360,13 +365,6 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
>  	    }
>  	  else
>  	    j += i - suffix + 1;
> -
> -#if CHECK_EOL
> -	  if (!AVAILABLE (haystack, haystack_len, j, needle_len))
> -	    break;
> -#endif
> -
> -	  phaystack = &haystack[suffix + j];
>  	}
>      }
>   ret0: __attribute__ ((unused))
> diff --git a/string/strcasestr.c b/string/strcasestr.c
> index 90ba189790481a58eb7627eb8c396530f988f985..5909fe3cdba88e476c7a989a020f3611bbfeb1de 100644
> --- a/string/strcasestr.c
> +++ b/string/strcasestr.c
> @@ -37,8 +37,8 @@
>  /* Two-Way algorithm.  */
>  #define RETURN_TYPE char *
>  #define AVAILABLE(h, h_l, j, n_l)			\
> -  (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l))	\
> -   && ((h_l) = (j) + (n_l)))
> +  (((j) + (n_l) <= (h_l)) || ((h_l) += __strnlen ((void*)((h) + (h_l)), 512), \
> +			      (j) + (n_l) <= (h_l)))
>  #define CHECK_EOL (1)
>  #define RET0_IF_0(a) if (!a) goto ret0
>  #define CANON_ELEMENT(c) TOLOWER (c)
> diff --git a/string/strstr.c b/string/strstr.c
> index b3b5deb673758510616d32849bcf2fc15ce941cd..265e9f310ce507ce63740cc42d8ceea1d28dab01 100644
> --- a/string/strstr.c
> +++ b/string/strstr.c
> @@ -33,10 +33,11 @@
>  
>  #define RETURN_TYPE char *
>  #define AVAILABLE(h, h_l, j, n_l)			\
> -  (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l))	\
> -   && ((h_l) = (j) + (n_l)))
> +  (((j) + (n_l) <= (h_l)) || ((h_l) += __strnlen ((void*)((h) + (h_l)), 512), \
> +			      (j) + (n_l) <= (h_l)))
>  #define CHECK_EOL (1)
>  #define RET0_IF_0(a) if (!a) goto ret0
> +#define FASTSEARCH(S,C,N) (void*) strchr ((void*)(S), (C))
>  #include "str-two-way.h"
>  
>  #undef strstr
> diff --git a/string/test-strcasestr.c b/string/test-strcasestr.c
> index 2b0a38ea734974ec389ad206874081a8b67d8066..9b1088df54c64b82348c555e16d35cf967e19dd9 100644
> --- a/string/test-strcasestr.c
> +++ b/string/test-strcasestr.c
> @@ -25,6 +25,7 @@
>  #define STRCASESTR simple_strcasestr
>  #define NO_ALIAS
>  #define __strncasecmp strncasecmp
> +#define __strnlen strnlen
>  #include "strcasestr.c"
>  
>  
> diff --git a/string/test-strstr.c b/string/test-strstr.c
> index acf6ff8224608737701046a421faed2be9f52f68..8d99716ff39cc2c22ebebdacb5f30438f9b32ffc 100644
> --- a/string/test-strstr.c
> +++ b/string/test-strstr.c
> @@ -24,6 +24,7 @@
>  
>  #define STRSTR simple_strstr
>  #define libc_hidden_builtin_def(arg) /* nothing */
> +#define __strnlen strnlen
>  #include "strstr.c"
>  
>  
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]