This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] Improve strtok(_r) performance
- From: Adhemerval Zanella <adhemerval dot zanella at linaro dot org>
- To: libc-alpha at sourceware dot org
- Date: Fri, 4 Nov 2016 10:54:53 -0200
- Subject: Re: [PATCH] Improve strtok(_r) performance
- Authentication-results: sourceware.org; auth=none
- References: <AM5PR0802MB2610F3F2F9864ECF0089F9E583AD0@AM5PR0802MB2610.eurprd08.prod.outlook.com>
On 28/10/2016 09:35, Wilco Dijkstra wrote:
> Improve strtok(_r) performance. Instead of calling strpbrk which calls
> strcspn, call strcspn directly so we get the end of the token without
> an extra call to rawmemchr. Also avoid an unnecessary call to strcspn after
> the last token by adding an early exit for an empty string. The result
> is a ~2x speedup of strtok on most inputs in bench-strtok.
>
> Passes regression tests, OK for commit?
Why not aim for simplicity and just use strtok_r and strtok? I should
be a tail call in most architecture and performance loss should be
minimum.
Either way LGTM. I also found that powerpc64 optimized one performs
worse than this new default one, once you push it in I plan to
remove it.
>
> ChangeLog:
> 2015-10-28 Wilco Dijkstra <wdijkstr@arm.com>
>
> * string/strtok.c (STRTOK): Optimize for performance.
> * string/strtok_r.c (__strtok_r): Likewise.
> --
>
> diff --git a/string/strtok.c b/string/strtok.c
> index 7a4574db5c80501e47d045ad4347e8a287b32191..b1ed48c24c8d20706b7d05481a138b18a01ff802 100644
> --- a/string/strtok.c
> +++ b/string/strtok.c
> @@ -38,11 +38,18 @@ static char *olds;
> char *
> STRTOK (char *s, const char *delim)
> {
> - char *token;
> + char *end;
>
> if (s == NULL)
> s = olds;
>
> + /* Return immediately at end of string. */
> + if (*s == '\0')
> + {
> + olds = s;
> + return NULL;
> + }
> +
> /* Scan leading delimiters. */
> s += strspn (s, delim);
> if (*s == '\0')
> @@ -52,16 +59,15 @@ STRTOK (char *s, const char *delim)
> }
>
> /* Find the end of the token. */
> - token = s;
> - s = strpbrk (token, delim);
> - if (s == NULL)
> - /* This token finishes the string. */
> - olds = __rawmemchr (token, '\0');
> - else
> + end = s + strcspn (s, delim);
> + if (*end == '\0')
> {
> - /* Terminate the token and make OLDS point past it. */
> - *s = '\0';
> - olds = s + 1;
> + olds = end;
> + return s;
> }
> - return token;
> +
> + /* Terminate the token and make OLDS point past it. */
> + *end = '\0';
> + olds = end + 1;
> + return s;
> }
> diff --git a/string/strtok_r.c b/string/strtok_r.c
> index f351304766108dad2c1cff881ad3bebae821b2a0..e049a5c82e026a3b6c1ba5da16ce81743717805e 100644
> --- a/string/strtok_r.c
> +++ b/string/strtok_r.c
> @@ -45,11 +45,17 @@
> char *
> __strtok_r (char *s, const char *delim, char **save_ptr)
> {
> - char *token;
> + char *end;
>
> if (s == NULL)
> s = *save_ptr;
>
> + if (*s == '\0')
> + {
> + *save_ptr = s;
> + return NULL;
> + }
> +
> /* Scan leading delimiters. */
> s += strspn (s, delim);
> if (*s == '\0')
> @@ -59,18 +65,17 @@ __strtok_r (char *s, const char *delim, char **save_ptr)
> }
>
> /* Find the end of the token. */
> - token = s;
> - s = strpbrk (token, delim);
> - if (s == NULL)
> - /* This token finishes the string. */
> - *save_ptr = __rawmemchr (token, '\0');
> - else
> + end = s + strcspn (s, delim);
> + if (*end == '\0')
> {
> - /* Terminate the token and make *SAVE_PTR point past it. */
> - *s = '\0';
> - *save_ptr = s + 1;
> + *save_ptr = end;
> + return s;
> }
> - return token;
> +
> + /* Terminate the token and make *SAVE_PTR point past it. */
> + *end = '\0';
> + *save_ptr = end + 1;
> + return s;
> }
> #ifdef weak_alias
> libc_hidden_def (__strtok_r)
>