This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH v2] Improve strsep
LGTM (I just replied to your older version and this one seems to have
corrected the remark I have made).
On 23/11/2016 17:07, Wilco Dijkstra wrote:
> This is version 2, rebased to be after the string2.h header changes
> https://sourceware.org/ml/libc-alpha/2016-11/msg00853.html
> (however like the earlier strtok patch there is no real dependency).
>
> It cleans up the strsep implementation and improves performance.
> Currently strsep calls strpbrk is is now a veneer to strcspn. Calling
> strcspn directly is faster. Since it handles a delimiter string of size
> 1 as a special case, this is not needed in strsep itself. Although this
> means there is a slightly higher overhead if the delimiter size is 1,
> all other cases are slightly faster. The overall performance gain is 5-10%
> on AArch64.
>
> The string/bits/string2.h header contains optimizations for constant
> delimiters of size 1-3. Benchmarking these showed similar performance for
> size 1 (since in all cases strchr/strchrnul is used), while size 2 and 3
> can give up to 2x speedup for small input strings. However if these cases
> are common it seems much better to add this optimization to strcspn.
> So move these header optimizations to string-inlines.c.
>
> Improve the strsep benchmark so that it actually benchmarks something.
> The current version contains a delimiter character at every position in the
> input string, so there is very little work to do, and the extremely inefficent
> simple_strsep implementation appears fastest in every case. The new version
> has either no match in the input for the fail case and a match halfway in the
> input for the success case. The input is then restored so that each iteration
> does exactly the same amount of work. Reduce the number of testcases since
> simple_strsep takes a lot of time now.
>
> Passes GLIBC tests, OK for commit?
>
> ChangeLog:
> 2015-11-23 Wilco Dijkstra <wdijkstr@arm.com>
>
> * benchtests/bench-strsep.c (oldstrsep): Add old implementation.
> (do_one_test) Restore original string so iteration works.
> * string/string-inlines.c (do_test): Create better input strings.
> (test_main) Reduce number of testruns.
> * string/string-inlines.c (__old_strsep_1c): New function.
> (__old_strsep_2c): Likewise.
> (__old_strsep_3c): Likewise.
> * string/strsep.c (__strsep): Remove case of small delim string.
> Call strcspn directly rather than strpbrk.
> * string/bits/string2.h (__strsep): Remove define.
> (__strsep_1c): Remove.
> (__strsep_2c): Remove.
> (__strsep_3c): Remove.
> (strsep): Remove.
>
> --
> diff --git a/benchtests/bench-strsep.c b/benchtests/bench-strsep.c
> index 70dbb377563232a43895abc940d396f2c65237c6..9e0c00d9f198776a3de4857446b82125566ba4b4 100644
> --- a/benchtests/bench-strsep.c
> +++ b/benchtests/bench-strsep.c
> @@ -49,10 +49,56 @@ simple_strsep (char **s1, char *s2)
> return begin;
> }
>
> +char *
> +oldstrsep (char **stringp, const char *delim)
> +{
> + char *begin, *end;
> +
> + begin = *stringp;
> + if (begin == NULL)
> + return NULL;
> +
> + /* A frequent case is when the delimiter string contains only one
> + character. Here we don't need to call the expensive `strpbrk'
> + function and instead work using `strchr'. */
> + if (delim[0] == '\0' || delim[1] == '\0')
> + {
> + char ch = delim[0];
> +
> + if (ch == '\0')
> + end = NULL;
> + else
> + {
> + if (*begin == ch)
> + end = begin;
> + else if (*begin == '\0')
> + end = NULL;
> + else
> + end = strchr (begin + 1, ch);
> + }
> + }
> + else
> + /* Find the end of the token. */
> + end = strpbrk (begin, delim);
> +
> + if (end)
> + {
> + /* Terminate the token and set *STRINGP past NUL character. */
> + *end++ = '\0';
> + *stringp = end;
> + }
> + else
> + /* No more delimiters; this is the last token. */
> + *stringp = NULL;
> +
> + return begin;
> +}
> +
> typedef char *(*proto_t) (const char **, const char *);
>
> IMPL (simple_strsep, 0)
> IMPL (strsep, 1)
> +IMPL (oldstrsep, 2)
>
> static void
> do_one_test (impl_t * impl, const char *s1, const char *s2)
> @@ -63,7 +109,10 @@ do_one_test (impl_t * impl, const char *s1, const char *s2)
> TIMING_NOW (start);
> for (i = 0; i < iters; ++i)
> {
> - CALL (impl, &s1, s2);
> + const char *s1a = s1;
> + CALL (impl, &s1a, s2);
> + if (s1a != NULL)
> + ((char*)s1a)[-1] = '1';
> }
> TIMING_NOW (stop);
>
> @@ -76,7 +125,7 @@ static void
> do_test (size_t align1, size_t align2, size_t len1, size_t len2, int fail)
> {
> char *s2 = (char *) (buf2 + align2);
> - static const char d[] = "1234567890abcdef";
> + static const char d[] = "1234567891abcdef";
> #define dl (sizeof (d) - 1)
> char *ss2 = s2;
> for (size_t l = len2; l > 0; l = l > dl ? l - dl : 0)
> @@ -92,24 +141,9 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2, int fail)
> FOR_EACH_IMPL (impl, 0)
> {
> char *s1 = (char *) (buf1 + align1);
> - if (fail)
> - {
> - char *ss1 = s1;
> - for (size_t l = len1; l > 0; l = l > dl ? l - dl : 0)
> - {
> - size_t t = l > dl ? dl : l;
> - memcpy (ss1, d, t);
> - ++ss1[len2 > 7 ? 7 : len2 - 1];
> - ss1 += t;
> - }
> - }
> - else
> - {
> - memset (s1, '0', len1);
> - memcpy (s1 + (len1 - len2) - 2, s2, len2);
> - if ((len1 / len2) > 4)
> - memcpy (s1 + (len1 - len2) - (3 * len2), s2, len2);
> - }
> + memset (s1, '0', len1);
> + if (!fail)
> + s1[len1 / 2] = '1';
> s1[len1] = '\0';
> do_one_test (impl, s1, s2);
> }
> @@ -127,7 +161,7 @@ test_main (void)
> putchar ('\n');
>
> for (size_t klen = 2; klen < 32; ++klen)
> - for (size_t hlen = 2 * klen; hlen < 16 * klen; hlen += klen)
> + for (size_t hlen = 4 * klen; hlen < 8 * klen; hlen += klen)
> {
> do_test (0, 0, hlen, klen, 0);
> do_test (0, 0, hlen, klen, 1);
> diff --git a/string/bits/string2.h b/string/bits/string2.h
> index 5acdb7866e4e70a6c3aa050b0427b31aed8943ec..03af22cc27a33c81f36d56ddc52fce0a5ea81ece 100644
> --- a/string/bits/string2.h
> +++ b/string/bits/string2.h
> @@ -118,96 +118,6 @@
> #endif
>
>
> -#if !defined _HAVE_STRING_ARCH_strsep || defined _FORCE_INLINES
> -# ifndef _HAVE_STRING_ARCH_strsep
> -
> -extern char *__strsep_g (char **__stringp, const char *__delim);
> -# define __strsep(s, reject) \
> - __extension__ \
> - ({ char __r0, __r1, __r2; \
> - (__builtin_constant_p (reject) && __string2_1bptr_p (reject) \
> - && (__r0 = ((const char *) (reject))[0], \
> - ((const char *) (reject))[0] != '\0') \
> - ? ((__r1 = ((const char *) (reject))[1], \
> - ((const char *) (reject))[1] == '\0') \
> - ? __strsep_1c (s, __r0) \
> - : ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \
> - ? __strsep_2c (s, __r0, __r1) \
> - : (((const char *) (reject))[3] == '\0' \
> - ? __strsep_3c (s, __r0, __r1, __r2) \
> - : __strsep_g (s, reject)))) \
> - : __strsep_g (s, reject)); })
> -# endif
> -
> -__STRING_INLINE char *__strsep_1c (char **__s, char __reject);
> -__STRING_INLINE char *
> -__strsep_1c (char **__s, char __reject)
> -{
> - char *__retval = *__s;
> - if (__retval != NULL && (*__s = strchr (__retval, __reject)) != NULL)
> - *(*__s)++ = '\0';
> - return __retval;
> -}
> -
> -__STRING_INLINE char *__strsep_2c (char **__s, char __reject1, char __reject2);
> -__STRING_INLINE char *
> -__strsep_2c (char **__s, char __reject1, char __reject2)
> -{
> - char *__retval = *__s;
> - if (__retval != NULL)
> - {
> - char *__cp = __retval;
> - while (1)
> - {
> - if (*__cp == '\0')
> - {
> - __cp = NULL;
> - break;
> - }
> - if (*__cp == __reject1 || *__cp == __reject2)
> - {
> - *__cp++ = '\0';
> - break;
> - }
> - ++__cp;
> - }
> - *__s = __cp;
> - }
> - return __retval;
> -}
> -
> -__STRING_INLINE char *__strsep_3c (char **__s, char __reject1, char __reject2,
> - char __reject3);
> -__STRING_INLINE char *
> -__strsep_3c (char **__s, char __reject1, char __reject2, char __reject3)
> -{
> - char *__retval = *__s;
> - if (__retval != NULL)
> - {
> - char *__cp = __retval;
> - while (1)
> - {
> - if (*__cp == '\0')
> - {
> - __cp = NULL;
> - break;
> - }
> - if (*__cp == __reject1 || *__cp == __reject2 || *__cp == __reject3)
> - {
> - *__cp++ = '\0';
> - break;
> - }
> - ++__cp;
> - }
> - *__s = __cp;
> - }
> - return __retval;
> -}
> -# ifdef __USE_MISC
> -# define strsep(s, reject) __strsep (s, reject)
> -# endif
> -#endif
> -
> /* We need the memory allocation functions for inline strdup().
> Referring to stdlib.h (even minimally) is not allowed
> in any of the tight standards compliant modes. */
> diff --git a/string/string-inlines.c b/string/string-inlines.c
> index f6f56c56f875a89fbc05dec61a7603c2a869607c..6d0c0c51b3027f99d26aace85523d7f337df8308 100644
> --- a/string/string-inlines.c
> +++ b/string/string-inlines.c
> @@ -62,6 +62,70 @@ __old_strtok_r_1c (char *__s, char __sep, char **__nextp)
> return __result;
> }
> compat_symbol (libc, __old_strtok_r_1c, __strtok_r_1c, GLIBC_2_1_1);
> +
> +char *
> +__old_strsep_1c (char **__s, char __reject)
> +{
> + char *__retval = *__s;
> + if (__retval != NULL && (*__s = strchr (__retval, __reject)) != NULL)
> + *(*__s)++ = '\0';
> + return __retval;
> +}
> +compat_symbol (libc, __old_strsep_1c, __strsep_1c, GLIBC_2_1_1);
> +
> +char *
> +__old_strsep_2c (char **__s, char __reject1, char __reject2)
> +{
> + char *__retval = *__s;
> + if (__retval != NULL)
> + {
> + char *__cp = __retval;
> + while (1)
> + {
> + if (*__cp == '\0')
> + {
> + __cp = NULL;
> + break;
> + }
> + if (*__cp == __reject1 || *__cp == __reject2)
> + {
> + *__cp++ = '\0';
> + break;
> + }
> + ++__cp;
> + }
> + *__s = __cp;
> + }
> + return __retval;
> +}
> +compat_symbol (libc, __old_strsep_2c, __strsep_2c, GLIBC_2_1_1);
> +
> +char *
> +__old_strsep_3c (char **__s, char __reject1, char __reject2, char __reject3)
> +{
> + char *__retval = *__s;
> + if (__retval != NULL)
> + {
> + char *__cp = __retval;
> + while (1)
> + {
> + if (*__cp == '\0')
> + {
> + __cp = NULL;
> + break;
> + }
> + if (*__cp == __reject1 || *__cp == __reject2 || *__cp == __reject3)
> + {
> + *__cp++ = '\0';
> + break;
> + }
> + ++__cp;
> + }
> + *__s = __cp;
> + }
> + return __retval;
> +}
> +compat_symbol (libc, __old_strsep_3c, __strsep_3c, GLIBC_2_1_1);
> #endif
>
> #if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_24)
> diff --git a/string/strsep.c b/string/strsep.c
> index 10547740481bed8f7a065f76eccfe7b320c3c49c..68581c863980c1e86ae8559400ca0d1cec80d95e 100644
> --- a/string/strsep.c
> +++ b/string/strsep.c
> @@ -29,30 +29,10 @@ __strsep (char **stringp, const char *delim)
> if (begin == NULL)
> return NULL;
>
> - /* A frequent case is when the delimiter string contains only one
> - character. Here we don't need to call the expensive `strpbrk'
> - function and instead work using `strchr'. */
> - if (delim[0] == '\0' || delim[1] == '\0')
> - {
> - char ch = delim[0];
> -
> - if (ch == '\0')
> - end = NULL;
> - else
> - {
> - if (*begin == ch)
> - end = begin;
> - else if (*begin == '\0')
> - end = NULL;
> - else
> - end = strchr (begin + 1, ch);
> - }
> - }
> - else
> - /* Find the end of the token. */
> - end = strpbrk (begin, delim);
> + /* Find the end of the token. */
> + end = begin + strcspn (begin, delim);
>
> - if (end)
> + if (*end)
> {
> /* Terminate the token and set *STRINGP past NUL character. */
> *end++ = '\0';
>