[PATCH v2 3/3] wcsmbs: Ensure wcstr worst-case linear execution time (BZ 23865)
Adhemerval Zanella Netto
adhemerval.zanella@linaro.org
Fri Mar 1 16:35:19 GMT 2024
On 29/02/24 02:19, DJ Delorie wrote:
> Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
>> It uses the same two-way algorithm used on strstr, strcasestr, and
>> memmem. Different than strstr, neither the "shift table" optimization
>> nor the self-adapting filtering check is used because it would result in
>> a too-large shift table (and it also simplifies the implementation bit).
>
> Question about the repeat loops and untimed tests.
> Question about copyright years.
> One typo.
>
>> +static void
>> +check3 (void)
>> +{
>> + /* Check that a long periodic needle does not cause false positives. */
>> + {
>> + const CHAR input[] = L("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"
>> + "_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD"
>> + "_C3_A7_20_EF_BF_BD");
>> + const CHAR need[] = L("_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD");
>> + FOR_EACH_IMPL (impl, 0)
>> + check_result (impl, input, need, NULL);
>> + }
>
> Ok.
>
>> + {
>> + const CHAR input[] = L("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"
>> + "_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD"
>> + "_C3_A7_20_EF_BF_BD_DA_B5_C2_A6_20"
>> + "_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD");
>> + const CHAR need[] = L("_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD");
>> + FOR_EACH_IMPL (impl, 0)
>> + check_result (impl, input, need, (CHAR *) input + 115);
>> + }
>> +}
>
> Ok.
>
>> +static void
>> +pr23865 (void)
>> +{
>> + /* Check that a very long haystack is handled quickly if the needle is
>> + short and occurs near the beginning. */
>> + {
>> + size_t repeat = 10000;
>> + size_t m = 1000000;
>> + const CHAR *needle =
>> + L("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
>> + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
>> + CHAR *haystack = xmalloc ((m + 1) * sizeof (CHAR));
>> + MEMSET (haystack, L('A'), m);
>> + haystack[0] = L('B');
>> + haystack[m] = L('\0');
>> +
>> + for (; repeat > 0; repeat--)
>> + {
>> + FOR_EACH_IMPL (impl, 0)
>> + check_result (impl, haystack, needle, haystack + 1);
>> + }
>
> What is the purpose of the repeat loop? This function is only called
> once, and not timed...
>
> The test itself is OK.
I copied this tests from gnulib (9411c5e467cf60f6295b9fed306029f341a0f24f)
and my understanding is to enforce that wcsstr won't take too much time
by triggering a timeout. These kind of pattern are slower for newer
implementation compared to the current one.
The repeat number is indeed arbitrary and depending of the hardware should
not make any different (on my box I only see some different with way larger
inputs).
>
>> + free (haystack);
>> + }
>> +
>> + /* Check that a very long needle is discarded quickly if the haystack is
>> + short. */
>> + {
>> + size_t repeat = 10000;
>> + size_t m = 1000000;
>> + const CHAR *haystack =
>> + L("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
>> + "ABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABAB");
>> + CHAR *needle = xmalloc ((m + 1) * sizeof (CHAR));
>> + MEMSET (needle, L'A', m);
>> + needle[m] = L('\0');
>> +
>> + for (; repeat > 0; repeat--)
>> + {
>> + FOR_EACH_IMPL (impl, 0)
>> + check_result (impl, haystack, needle, NULL);
>> + }
>> +
>> + free (needle);
>> + }
>
> Test OK but question the repeat look.
>
>> + /* Check that the asymptotic worst-case complexity is not quadratic. */
>> + {
>> + size_t m = 1000000;
>> + CHAR *haystack = xmalloc ((2 * m + 2) * sizeof (CHAR));
>> + CHAR *needle = xmalloc ((m + 2) * sizeof (CHAR));
>> +
>> + MEMSET (haystack, L('A'), 2 * m);
>> + haystack[2 * m] = L('B');
>> + haystack[2 * m + 1] = L('\0');
>> +
>> + MEMSET (needle, L('A'), m);
>> + needle[m] = L('B');
>> + needle[m + 1] = L('\0');
>> +
>> + FOR_EACH_IMPL (impl, 0)
>> + check_result (impl, haystack, needle, haystack + m);
>> +
>> + free (needle);
>> + free (haystack);
>> + }
>
> Without timing it, how do we test that it is not quadratic?
Because on current implementation it takes forever (about 4 minutes
on my box) due the input sizes.
>
>> + {
>> + /* Ensure that with a barely periodic "short" needle, STRSTR's
>> + search does not mistakenly skip just past the match point. */
>> + const CHAR *haystack =
>> + L("\n"
>> + "with_build_libsubdir\n"
>> + "with_local_prefix\n"
>> + "with_gxx_include_dir\n"
>> + "with_cpp_install_dir\n"
>> + "enable_generated_files_in_srcdir\n"
>> + "with_gnu_ld\n"
>> + "with_ld\n"
>> + "with_demangler_in_ld\n"
>> + "with_gnu_as\n"
>> + "with_as\n"
>> + "enable_largefile\n"
>> + "enable_werror_always\n"
>> + "enable_checking\n"
>> + "enable_coverage\n"
>> + "enable_gather_detailed_mem_stats\n"
>> + "enable_build_with_cxx\n"
>> + "with_stabs\n"
>> + "enable_multilib\n"
>> + "enable___cxa_atexit\n"
>> + "enable_decimal_float\n"
>> + "enable_fixed_point\n"
>> + "enable_threads\n"
>> + "enable_tls\n"
>> + "enable_objc_gc\n"
>> + "with_dwarf2\n"
>> + "enable_shared\n"
>> + "with_build_sysroot\n"
>> + "with_sysroot\n"
>> + "with_specs\n"
>> + "with_pkgversion\n"
>> + "with_bugurl\n"
>> + "enable_languages\n"
>> + "with_multilib_list\n");
>> + const CHAR *needle =
>> + L("\n"
>> + "with_gnu_ld\n");
>> +
>> + FOR_EACH_IMPL (impl, 0)
>> + check_result (impl, haystack, needle, (CHAR*) haystack + 114);
>> + }
>
> Ok.
>
>> + {
>> + /* Same bug, shorter trigger. */
>> + const CHAR *haystack = L("..wi.d.");
>> + const CHAR *needle = L(".d.");
>> + FOR_EACH_IMPL (impl, 0)
>> + check_result (impl, haystack, needle, (CHAR*) haystack + 4);
>> + }
>
> Ok.
>
>> + /* Test case from Yves Bastide.
>> + <https://www.openwall.com/lists/musl/2014/04/18/2> */
>> + {
>> + const CHAR *input = L("playing play play play always");
>> + const CHAR *needle = L("play play play");
>> + FOR_EACH_IMPL (impl, 0)
>> + check_result (impl, input, needle, (CHAR*) input + 8);
>> + }
>
> Ok.
>
>> + /* Test long needles. */
>> + {
>> + size_t m = 1024;
>> + CHAR *haystack = xmalloc ((2 * m + 1) * sizeof (CHAR));
>> + CHAR *needle = xmalloc ((m + 1) * sizeof (CHAR));
>> + haystack[0] = L('x');
>> + MEMSET (haystack + 1, L(' '), m - 1);
>> + MEMSET (haystack + m, L('x'), m);
>> + haystack[2 * m] = L('\0');
>> + MEMSET (needle, L('x'), m);
>> + needle[m] = L('\0');
>> +
>> + FOR_EACH_IMPL (impl, 0)
>> + check_result (impl, haystack, needle, haystack + m);
>> +
>> + free (needle);
>> + free (haystack);
>> + }
>> +}
>
> Ok.
>
>> @@ -236,7 +411,9 @@ test_main (void)
>>
>> check1 ();
>> check2 ();
>> + check3 ();
>> pr23637 ();
>> + pr23865 ();
>>
>> printf ("%23s", "");
>> FOR_EACH_IMPL (impl, 0)
>
> Ok.
>
>> diff --git a/wcsmbs/wcs-two-way.h b/wcsmbs/wcs-two-way.h
>> new file mode 100644
>> index 0000000000..2dcee7fc1a
>> --- /dev/null
>> +++ b/wcsmbs/wcs-two-way.h
>> @@ -0,0 +1,312 @@
>> +/* Byte-wise substring search, using the Two-Way algorithm.
>> + Copyright (C) 2024 Free Software Foundation, Inc.
>
> This appears to be an edited copy of string/str-two-way.h
>
> If so, shouldn't the copyright years include the span from
> string/str-two-way.h? Also I noted that the author attribution has been
> removed, but I assume that's glibc policy.
Yes, I decided to added a new file instead of parametrize str-two-way.h
because wcsstr only uses two_way_short_needle and to make
two_way_long_needle work correctly it would need a quite large shift_table.
And I remove the author as current glibc policy indeed.
I will update the Copyright years.
>
>> + /* Choose the shorted suffix. Return the first character of the right
>
> s/shorted/shorter/
>
Ack.
>> diff --git a/wcsmbs/wcsstr.c b/wcsmbs/wcsstr.c
>> index 78f1cc9ce0..f97ea70010 100644
>> --- a/wcsmbs/wcsstr.c
>> +++ b/wcsmbs/wcsstr.c
>> @@ -1,4 +1,5 @@
>> -/* Copyright (C) 1995-2024 Free Software Foundation, Inc.
>> +/* Locate a substring in a wide-character string.
>> + Copyright (C) 1995-2024 Free Software Foundation, Inc.
>
> Ok.
>
>> License along with the GNU C Library; if not, see
>> <https://www.gnu.org/licenses/>. */
>>
>> -/*
>> - * The original strstr() file contains the following comment:
>> - *
>> - * My personal strstr() implementation that beats most other algorithms.
>> - * Until someone tells me otherwise, I assume that this is the
>> - * fastest implementation of strstr() in C.
>> - * I deliberately chose not to comment it. You should have at least
>> - * as much fun trying to understand it, as I had to write it :-).
>> - *
>> - * Stephen R. van den Berg, berg@pool.informatik.rwth-aachen.de */
>> -
>
> Heh. Ok.
>
>> #include <wchar.h>
>> +#include <string.h>
>> +
>> +#define AVAILABLE(h, h_l, j, n_l) \
>> + (((j) + (n_l) <= (h_l)) \
>> + || ((h_l) += __wcsnlen ((void*)((h) + (h_l)), (n_l) + 128), \
>> + (j) + (n_l) <= (h_l)))
>> +#include "wcs-two-way.h"
>
> Ok.
>
>> wchar_t *
>> wcsstr (const wchar_t *haystack, const wchar_t *needle)
>> {
>> - wchar_t b, c;
>> -
>> . . .
>> - return (wchar_t*) haystack;
>> -ret0:
>> - return NULL;
>> + /* Ensure haystack length is at least as long as needle length.
>> + Since a match may occur early on in a huge haystack, use strnlen
>> + and read ahead a few cachelines for improved performance. */
>> + size_t ne_len = __wcslen (needle);
>> + size_t hs_len = __wcsnlen (haystack, ne_len | 128);
>> + if (hs_len < ne_len)
>> + return NULL;
>> +
>> + /* Check whether we have a match. This improves performance since we
>> + avoid initialization overheads. */
>> + if (__wmemcmp (haystack, needle, ne_len) == 0)
>> + return (wchar_t *) haystack;
>> +
>> + return two_way_short_needle (haystack, hs_len, needle, ne_len);
>> }
>
> Ok.
>
More information about the Libc-alpha
mailing list