[PATCH v2 3/3] wcsmbs: Ensure wcstr worst-case linear execution time (BZ 23865)

DJ Delorie dj@redhat.com
Thu Feb 29 05:19:45 GMT 2024


Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
> It uses the same two-way algorithm used on strstr, strcasestr, and
> memmem.  Different than strstr, neither the "shift table" optimization
> nor the self-adapting filtering check is used because it would result in
> a too-large shift table (and it also simplifies the implementation bit).

Question about the repeat loops and untimed tests.
Question about copyright years.
One typo.

> +static void
> +check3 (void)
> +{
> +  /* Check that a long periodic needle does not cause false positives.  */
> +  {
> +    const CHAR input[] = L("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"
> +                            "_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD"
> +                            "_C3_A7_20_EF_BF_BD");
> +    const CHAR need[] = L("_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD");
> +    FOR_EACH_IMPL (impl, 0)
> +      check_result (impl, input, need, NULL);
> +  }

Ok.

> +  {
> +    const CHAR input[] = L("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"
> +			   "_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD"
> +			   "_C3_A7_20_EF_BF_BD_DA_B5_C2_A6_20"
> +			   "_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD");
> +    const CHAR need[] = L("_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD");
> +    FOR_EACH_IMPL (impl, 0)
> +      check_result (impl, input, need, (CHAR *) input + 115);
> +  }
> +}

Ok.

> +static void
> +pr23865 (void)
> +{
> +  /* Check that a very long haystack is handled quickly if the needle is
> +     short and occurs near the beginning.  */
> +  {
> +    size_t repeat = 10000;
> +    size_t m = 1000000;
> +    const CHAR *needle =
> +      L("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
> +        "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
> +    CHAR *haystack = xmalloc ((m + 1) * sizeof (CHAR));
> +    MEMSET (haystack, L('A'), m);
> +    haystack[0] = L('B');
> +    haystack[m] = L('\0');
> +
> +    for (; repeat > 0; repeat--)
> +      {
> +	FOR_EACH_IMPL (impl, 0)
> +	  check_result (impl, haystack, needle, haystack + 1);
> +      }

What is the purpose of the repeat loop?  This function is only called
once, and not timed...

The test itself is OK.

> +    free (haystack);
> +  }
> +
> +  /* Check that a very long needle is discarded quickly if the haystack is
> +     short.  */
> +  {
> +    size_t repeat = 10000;
> +    size_t m = 1000000;
> +    const CHAR *haystack =
> +      L("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
> +	"ABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABAB");
> +    CHAR *needle = xmalloc ((m + 1) * sizeof (CHAR));
> +    MEMSET (needle, L'A', m);
> +    needle[m] = L('\0');
> +
> +    for (; repeat > 0; repeat--)
> +      {
> +	FOR_EACH_IMPL (impl, 0)
> +	  check_result (impl, haystack, needle, NULL);
> +      }
> +
> +    free (needle);
> +  }

Test OK but question the repeat look.

> +  /* Check that the asymptotic worst-case complexity is not quadratic.  */
> +  {
> +    size_t m = 1000000;
> +    CHAR *haystack = xmalloc ((2 * m + 2) * sizeof (CHAR));
> +    CHAR *needle = xmalloc ((m + 2) * sizeof (CHAR));
> +
> +    MEMSET (haystack, L('A'), 2 * m);
> +    haystack[2 * m] = L('B');
> +    haystack[2 * m + 1] = L('\0');
> +
> +    MEMSET (needle, L('A'), m);
> +    needle[m] = L('B');
> +    needle[m + 1] = L('\0');
> +
> +    FOR_EACH_IMPL (impl, 0)
> +      check_result (impl, haystack, needle, haystack + m);
> +
> +    free (needle);
> +    free (haystack);
> +  }

Without timing it, how do we test that it is not quadratic?

> +  {
> +    /* Ensure that with a barely periodic "short" needle, STRSTR's
> +       search does not mistakenly skip just past the match point.  */
> +    const CHAR *haystack =
> +      L("\n"
> +       "with_build_libsubdir\n"
> +       "with_local_prefix\n"
> +       "with_gxx_include_dir\n"
> +       "with_cpp_install_dir\n"
> +       "enable_generated_files_in_srcdir\n"
> +       "with_gnu_ld\n"
> +       "with_ld\n"
> +       "with_demangler_in_ld\n"
> +       "with_gnu_as\n"
> +       "with_as\n"
> +       "enable_largefile\n"
> +       "enable_werror_always\n"
> +       "enable_checking\n"
> +       "enable_coverage\n"
> +       "enable_gather_detailed_mem_stats\n"
> +       "enable_build_with_cxx\n"
> +       "with_stabs\n"
> +       "enable_multilib\n"
> +       "enable___cxa_atexit\n"
> +       "enable_decimal_float\n"
> +       "enable_fixed_point\n"
> +       "enable_threads\n"
> +       "enable_tls\n"
> +       "enable_objc_gc\n"
> +       "with_dwarf2\n"
> +       "enable_shared\n"
> +       "with_build_sysroot\n"
> +       "with_sysroot\n"
> +       "with_specs\n"
> +       "with_pkgversion\n"
> +       "with_bugurl\n"
> +       "enable_languages\n"
> +       "with_multilib_list\n");
> +    const CHAR *needle =
> +      L("\n"
> +       "with_gnu_ld\n");
> +
> +    FOR_EACH_IMPL (impl, 0)
> +      check_result (impl, haystack, needle, (CHAR*) haystack + 114);
> +  }

Ok.

> +  {
> +    /* Same bug, shorter trigger.  */
> +    const CHAR *haystack = L("..wi.d.");
> +    const CHAR *needle = L(".d.");
> +    FOR_EACH_IMPL (impl, 0)
> +      check_result (impl, haystack, needle, (CHAR*) haystack + 4);
> +  }

Ok.

> +  /* Test case from Yves Bastide.
> +     <https://www.openwall.com/lists/musl/2014/04/18/2>  */
> +  {
> +    const CHAR *input = L("playing play play play always");
> +    const CHAR *needle = L("play play play");
> +    FOR_EACH_IMPL (impl, 0)
> +      check_result (impl, input, needle, (CHAR*) input + 8);
> +  }

Ok.

> +  /* Test long needles.  */
> +  {
> +    size_t m = 1024;
> +    CHAR *haystack = xmalloc ((2 * m + 1) * sizeof (CHAR));
> +    CHAR *needle = xmalloc ((m + 1) * sizeof (CHAR));
> +    haystack[0] = L('x');
> +    MEMSET (haystack + 1, L(' '), m - 1);
> +    MEMSET (haystack + m, L('x'), m);
> +    haystack[2 * m] = L('\0');
> +    MEMSET (needle, L('x'), m);
> +    needle[m] = L('\0');
> +
> +    FOR_EACH_IMPL (impl, 0)
> +      check_result (impl, haystack, needle, haystack + m);
> +
> +    free (needle);
> +    free (haystack);
> +  }
> +}

Ok.

> @@ -236,7 +411,9 @@ test_main (void)
>  
>    check1 ();
>    check2 ();
> +  check3 ();
>    pr23637 ();
> +  pr23865 ();
>  
>    printf ("%23s", "");
>    FOR_EACH_IMPL (impl, 0)

Ok.

> diff --git a/wcsmbs/wcs-two-way.h b/wcsmbs/wcs-two-way.h
> new file mode 100644
> index 0000000000..2dcee7fc1a
> --- /dev/null
> +++ b/wcsmbs/wcs-two-way.h
> @@ -0,0 +1,312 @@
> +/* Byte-wise substring search, using the Two-Way algorithm.
> +   Copyright (C) 2024 Free Software Foundation, Inc.

This appears to be an edited copy of string/str-two-way.h

If so, shouldn't the copyright years include the span from
string/str-two-way.h?  Also I noted that the author attribution has been
removed, but I assume that's glibc policy.

> +  /* Choose the shorted suffix.  Return the first character of the right

s/shorted/shorter/

> diff --git a/wcsmbs/wcsstr.c b/wcsmbs/wcsstr.c
> index 78f1cc9ce0..f97ea70010 100644
> --- a/wcsmbs/wcsstr.c
> +++ b/wcsmbs/wcsstr.c
> @@ -1,4 +1,5 @@
> -/* Copyright (C) 1995-2024 Free Software Foundation, Inc.
> +/* Locate a substring in a wide-character string.
> +   Copyright (C) 1995-2024 Free Software Foundation, Inc.

Ok.

>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>  
> -/*
> - * The original strstr() file contains the following comment:
> - *
> - * My personal strstr() implementation that beats most other algorithms.
> - * Until someone tells me otherwise, I assume that this is the
> - * fastest implementation of strstr() in C.
> - * I deliberately chose not to comment it.  You should have at least
> - * as much fun trying to understand it, as I had to write it :-).
> - *
> - * Stephen R. van den Berg, berg@pool.informatik.rwth-aachen.de */
> -

Heh.  Ok.

>  #include <wchar.h>
> +#include <string.h>
> +
> +#define AVAILABLE(h, h_l, j, n_l)					\
> +  (((j) + (n_l) <= (h_l))						\
> +   || ((h_l) += __wcsnlen ((void*)((h) + (h_l)), (n_l) + 128),		\
> +       (j) + (n_l) <= (h_l)))
> +#include "wcs-two-way.h"

Ok.

>  wchar_t *
>  wcsstr (const wchar_t *haystack, const wchar_t *needle)
>  {
> -  wchar_t b, c;
> -
> . . .
> -  return (wchar_t*) haystack;
> -ret0:
> -  return NULL;
> +  /* Ensure haystack length is at least as long as needle length.
> +     Since a match may occur early on in a huge haystack, use strnlen
> +     and read ahead a few cachelines for improved performance.  */
> +  size_t ne_len = __wcslen (needle);
> +  size_t hs_len = __wcsnlen (haystack, ne_len | 128);
> +  if (hs_len < ne_len)
> +    return NULL;
> +
> +  /* Check whether we have a match.  This improves performance since we
> +     avoid initialization overheads.  */
> +  if (__wmemcmp (haystack, needle, ne_len) == 0)
> +    return (wchar_t *) haystack;
> +
> +  return two_way_short_needle (haystack, hs_len, needle, ne_len);
>  }

Ok.



More information about the Libc-alpha mailing list