This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH v2] Improve bench-strstr



On 18/03/2019 13:58, Wilco Dijkstra wrote:
> ping
>   
> 
> Hi Adhemerval,
> 
>> My point is now that you added a twoway implementation on bench-strstr,
>> I see to no point in continue to include the generic string/strstr.c
>> since we also check the one libc.so provide (strstr symbol itself).
>> So the my suggestion is just to remove the inclusion of generic
>> implementation.
> 
> Well I can leave that out easily, see updated v3 patch below.
> 
>> If we decided to use a different algorithm on strstr we add this
>> on bench-strstr as well.
> 
> Sure, we could also add other alternative algorithms like the QuickSearch
> variant I proposed.
> 
> Wilco
> 
> v3: removes generic_strstr variant, add comments
> 
> [PATCH v3] Improve bench-strstr 
> 
> Improve bench-strstr by using an extract from the manual as the input
> to make the test more realistic.  Use the same input for both found and
> fail cases rather than using a memset of '0' for most of the string,
> which measures performance of strchr rather than strstr.  Add result
> checking to catch potential errors.  Remove the repeated tests at slightly
> different alignments and add more large needle and haystack testcases.
> 
> Replace stupid_strstr with an efficient basic implementation.  Add the
> Two-way implementation to simplify comparisons with much faster generic
> implementations.
> 
> ChangeLog:
> 2019-02-06  Wilco Dijkstra  <wdijkstr@arm.com>
> 
>         * benchtests/bench-strstr.c (input): Add realistic input text.
>         (stupid_strstr): Remove function.
>         (basic_strstr): Add function.
>         (twoway_strstr): Add function.
>         (do_one_test): Add result checking.
>         (do_test): Use new input text.  Remove accidental early matches.
>         (test_main): Improve range of tests, reduce unaligned cases.

LGTM, thanks.

> --
> 
> diff --git a/benchtests/bench-strstr.c b/benchtests/bench-strstr.c
> index 2e6781291dbf6057b252657222655f63d6106061..31309b24029a96de7381e1050fd89e5d26642e5f 100644
> --- a/benchtests/bench-strstr.c
> +++ b/benchtests/bench-strstr.c
> @@ -16,63 +16,140 @@
>     License along with the GNU C Library; if not, see
>     <http://www.gnu.org/licenses/>.  */
>  
> +#define MIN_PAGE_SIZE 131072
>  #define TEST_MAIN
>  #define TEST_NAME "strstr"
>  #include "bench-string.h"
>  
> -
> -#define STRSTR simple_strstr
> -#define libc_hidden_builtin_def(X)
> -#define __strnlen strnlen
> -#include "../string/strstr.c"
> -
> -
> +static const char input[] =
> +"This manual is written with the assumption that you are at least "
> +"somewhat familiar with the C programming language and basic programming "
> +"concepts.  Specifically, familiarity with ISO standard C (*note ISO "
> +"C::), rather than “traditional” pre-ISO C dialects, is assumed.\n"
> +
> +"   The GNU C Library includes several “header files”, each of which "
> +"provides definitions and declarations for a group of related facilities; "
> +"this information is used by the C compiler when processing your program. "
> +"For example, the header file ‘stdio.h’ declares facilities for "
> +"performing input and output, and the header file ‘string.h’ declares "
> +"string processing utilities.  The organization of this manual generally "
> +"follows the same division as the header files.\n"
> +
> +"   If you are reading this manual for the first time, you should read "
> +"all of the introductory material and skim the remaining chapters.  There "
> +"are a _lot_ of functions in the GNU C Library and it’s not realistic to "
> +"expect that you will be able to remember exactly _how_ to use each and "
> +"every one of them.  It’s more important to become generally familiar "
> +"with the kinds of facilities that the library provides, so that when you "
> +"are writing your programs you can recognize _when_ to make use of "
> +"library functions, and _where_ in this manual you can find more specific "
> +"information about them.\n";
> +
> +/* Simple yet efficient strstr - for needles < 32 bytes it is 2-4 times
> +   faster than the optimized twoway_strstr.  */
>  static char *
> -stupid_strstr (const char *s1, const char *s2)
> +basic_strstr (const char *s1, const char *s2)
>  {
> -  ssize_t s1len = strlen (s1);
> -  ssize_t s2len = strlen (s2);
> +  size_t i;
> +  int c = s2[0];
>  
> -  if (s2len > s1len)
> -    return NULL;
> +  if (c == 0)
> +    return (char*)s1;
>  
> -  for (ssize_t i = 0; i <= s1len - s2len; ++i)
> +  for ( ; s1[0] != '\0'; s1++)
>      {
> -      size_t j;
> -      for (j = 0; j < s2len; ++j)
> -       if (s1[i + j] != s2[j])
> +      if (s1[0] != c)
> +       continue;
> +      for (i = 1; s2[i] != 0; i++)
> +       if (s1[i] != s2[i])
>            break;
> -      if (j == s2len)
> -       return (char *) s1 + i;
> +      if (s2[i] == '\0')
> +       return (char*)s1;
>      }
>  
>    return NULL;
>  }
>  
> +#define RETURN_TYPE char *
> +#define AVAILABLE(h, h_l, j, n_l)                      \
> +  (((j) + (n_l) <= (h_l)) \
> +   || ((h_l) += __strnlen ((void*)((h) + (h_l)), (n_l) + 512), \
> +       (j) + (n_l) <= (h_l)))
> +#define CHECK_EOL (1)
> +#define RET0_IF_0(a) if (!a) goto ret0
> +#define FASTSEARCH(S,C,N) (void*) strchr ((void*)(S), (C))
> +#define LONG_NEEDLE_THRESHOLD 32U
> +#define __strnlen strnlen
> +#include "string/str-two-way.h"
> +
> +/* Optimized Two-way implementation from GLIBC 2.29.  */
> +static char *
> +twoway_strstr (const char *haystack, const char *needle)
> +{
> +  size_t needle_len; /* Length of NEEDLE.  */
> +  size_t haystack_len; /* Known minimum length of HAYSTACK.  */
> +
> +  /* Handle empty NEEDLE special case.  */
> +  if (needle[0] == '\0')
> +    return (char *) haystack;
> +
> +  /* Skip until we find the first matching char from NEEDLE.  */
> +  haystack = strchr (haystack, needle[0]);
> +  if (haystack == NULL || needle[1] == '\0')
> +    return (char *) haystack;
> +
> +  /* Ensure HAYSTACK length is at least as long as NEEDLE length.
> +     Since a match may occur early on in a huge HAYSTACK, use strnlen
> +     and read ahead a few cachelines for improved performance.  */
> +  needle_len = strlen (needle);
> +  haystack_len = __strnlen (haystack, needle_len + 256);
> +  if (haystack_len < needle_len)
> +    return NULL;
> +
> +  /* Check whether we have a match.  This improves performance since we avoid
> +     the initialization overhead of the two-way algorithm.  */
> +  if (memcmp (haystack, needle, needle_len) == 0)
> +    return (char *) haystack;
> +
> +  /* Perform the search.  Abstract memory is considered to be an array
> +     of 'unsigned char' values, not an array of 'char' values.  See
> +     ISO C 99 section 6.2.6.1.  */
> +  if (needle_len < LONG_NEEDLE_THRESHOLD)
> +    return two_way_short_needle ((const unsigned char *) haystack,
> +                                 haystack_len,
> +                                (const unsigned char *) needle, needle_len);
> +  return two_way_long_needle ((const unsigned char *) haystack, haystack_len,
> +                             (const unsigned char *) needle, needle_len);
> +}
>  
>  typedef char *(*proto_t) (const char *, const char *);
>  
> -IMPL (stupid_strstr, 0)
> -IMPL (simple_strstr, 0)
>  IMPL (strstr, 1)
> -
> +IMPL (twoway_strstr, 0)
> +IMPL (basic_strstr, 0)
>  
>  static void
>  do_one_test (impl_t *impl, const char *s1, const char *s2, char *exp_result)
>  {
>    size_t i, iters = INNER_LOOP_ITERS;
>    timing_t start, stop, cur;
> +  char *res;
>  
>    TIMING_NOW (start);
>    for (i = 0; i < iters; ++i)
> -    {
> -      CALL (impl, s1, s2);
> -    }
> +    res = CALL (impl, s1, s2);
>    TIMING_NOW (stop);
>  
>    TIMING_DIFF (cur, start, stop);
>  
>    TIMING_PRINT_MEAN ((double) cur, (double) iters);
> +
> +  if (res != exp_result)
> +    {
> +      error (0, 0, "Wrong result in function %s %s %s", impl->name,
> +            res, exp_result);
> +      ret = 1;
> +    }
>  }
>  
>  
> @@ -83,36 +160,42 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2,
>    char *s1 = (char *) (buf1 + align1);
>    char *s2 = (char *) (buf2 + align2);
>  
> -  static const char d[] = "1234567890abcdef";
> -#define dl (sizeof (d) - 1)
> -  char *ss2 = s2;
> -  for (size_t l = len2; l > 0; l = l > dl ? l - dl : 0)
> -    {
> -      size_t t = l > dl ? dl : l;
> -      ss2 = mempcpy (ss2, d, t);
> -    }
> -  s2[len2] = '\0';
> +  size_t size = sizeof (input) - 1;
> +  size_t pos = (len1 + len2) % size;
>  
> -  if (fail)
> +  char *ss2 = s2;
> +  for (size_t l = len2; l > 0; l = l > size ? l - size : 0)
>      {
> -      char *ss1 = s1;
> -      for (size_t l = len1; l > 0; l = l > dl ? l - dl : 0)
> +      size_t t = l > size ? size : l;
> +      if (pos + t <= size)
> +       ss2 = mempcpy (ss2, input + pos, t);
> +      else
>          {
> -         size_t t = l > dl ? dl : l;
> -         memcpy (ss1, d, t);
> -         ++ss1[len2 > 7 ? 7 : len2 - 1];
> -         ss1 += t;
> +         ss2 = mempcpy (ss2, input + pos, size - pos);
> +         ss2 = mempcpy (ss2, input, t - (size - pos));
>          }
>      }
> -  else
> +  s2[len2] = '\0';
> +
> +  char *ss1 = s1;
> +  for (size_t l = len1; l > 0; l = l > size ? l - size : 0)
>      {
> -      memset (s1, '0', len1);
> -      memcpy (s1 + len1 - len2, s2, len2);
> +      size_t t = l > size ? size : l;
> +      memcpy (ss1, input, t);
> +      ss1 += t;
>      }
> +
> +  if (!fail)
> +    memcpy (s1 + len1 - len2, s2, len2);
>    s1[len1] = '\0';
>  
> -  printf ("Length %4zd/%zd, alignment %2zd/%2zd, %s:",
> -         len1, len2, align1, align2, fail ? "fail" : "found");
> +  /* Remove any accidental matches except for the last if !fail.  */
> +  for (ss1 = basic_strstr (s1, s2); ss1; ss1 = basic_strstr (ss1 + 1, s2))
> +    if (fail || ss1 != s1 + len1 - len2)
> +      ++ss1[len2 / 2];
> +
> +  printf ("Length %4zd/%3zd, alignment %2zd/%2zd, %s:",
> +         len1, len2, align1, align2, fail ? "fail " : "found");
>  
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s1, s2, fail ? NULL : s1 + len1 - len2);
> @@ -130,48 +213,19 @@ test_main (void)
>      printf ("\t%s", impl->name);
>    putchar ('\n');
>  
> -  for (size_t klen = 2; klen < 32; ++klen)
> -    for (size_t hlen = 2 * klen; hlen < 16 * klen; hlen += klen)
> +  for (size_t hlen = 64; hlen <= 256; hlen += 32)
> +    for (size_t klen = 1; klen <= 16; klen++)
>        {
> -       do_test (0, 0, hlen, klen, 0);
> -       do_test (0, 0, hlen, klen, 1);
> -       do_test (0, 3, hlen, klen, 0);
> -       do_test (0, 3, hlen, klen, 1);
> -       do_test (0, 9, hlen, klen, 0);
> +       do_test (1, 3, hlen, klen, 0);
>          do_test (0, 9, hlen, klen, 1);
> -       do_test (0, 15, hlen, klen, 0);
> -       do_test (0, 15, hlen, klen, 1);
> -
> -       do_test (3, 0, hlen, klen, 0);
> -       do_test (3, 0, hlen, klen, 1);
> -       do_test (3, 3, hlen, klen, 0);
> -       do_test (3, 3, hlen, klen, 1);
> -       do_test (3, 9, hlen, klen, 0);
> -       do_test (3, 9, hlen, klen, 1);
> -       do_test (3, 15, hlen, klen, 0);
> -       do_test (3, 15, hlen, klen, 1);
> -
> -       do_test (9, 0, hlen, klen, 0);
> -       do_test (9, 0, hlen, klen, 1);
> -       do_test (9, 3, hlen, klen, 0);
> -       do_test (9, 3, hlen, klen, 1);
> -       do_test (9, 9, hlen, klen, 0);
> -       do_test (9, 9, hlen, klen, 1);
> -       do_test (9, 15, hlen, klen, 0);
> -       do_test (9, 15, hlen, klen, 1);
> -
> -       do_test (15, 0, hlen, klen, 0);
> -       do_test (15, 0, hlen, klen, 1);
> -       do_test (15, 3, hlen, klen, 0);
> -       do_test (15, 3, hlen, klen, 1);
> -       do_test (15, 9, hlen, klen, 0);
> -       do_test (15, 9, hlen, klen, 1);
> -       do_test (15, 15, hlen, klen, 0);
> -       do_test (15, 15, hlen, klen, 1);
>        }
>  
> -  do_test (0, 0, page_size - 1, 16, 0);
> -  do_test (0, 0, page_size - 1, 16, 1);
> +  for (size_t hlen = 256; hlen <= 65536; hlen *= 2)
> +    for (size_t klen = 16; klen <= 256; klen *= 2)
> +      {
> +       do_test (1, 11, hlen, klen, 0);
> +       do_test (14, 5, hlen, klen, 1);
> +      }
>  
>    return ret;
>  }
>     
>     
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]