This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[RFC] Improve wordexp performance.
- From: OndÅej BÃlka <neleai at seznam dot cz>
- To: libc-alpha at sourceware dot org
- Date: Wed, 13 May 2015 14:49:45 +0200
- Subject: [RFC] Improve wordexp performance.
- Authentication-results: sourceware.org; auth=none
Hi, as I read Paul's wordexp patch I found that you use inefficient
idiom. Checking character membership with strchr is slow due to startup
cost. Much better is just use table lookup.
This patch does just that.
You could generalize this to strchr in bits/string2.h with constant
haystack but code would be ugly
It is related to improvement on my todo list namely precompute strpbrk
skip table which is done unless you have sse42 thats faster than lookup
table approach.
* posix/wordexp.c: Precompute character tables.
diff --git a/posix/wordexp.c b/posix/wordexp.c
index e711d43..8939ca9 100644
--- a/posix/wordexp.c
+++ b/posix/wordexp.c
@@ -45,6 +45,35 @@
#include <bits/libc-lock.h>
#include <_itoa.h>
+
+static int
+in_charset (char c, uint32_t *charset)
+{
+ return charset[c / 32] & (1 << (c % 32));
+}
+
+static void
+make_charset (char *a, uint32_t *b)
+{
+ int i;
+ for (i = 0; i < 8; i++)
+ b[i] = 0;
+ while (*a)
+ {
+ b[*a / 32] |= 1 << (*a % 32);
+ a++;
+ }
+}
+
+struct ifss {
+ char *ifs;
+ char *ifs_white;
+ uint32_t ifs_table[8];
+ uint32_t ifs_white_table[8];
+};
+
+struct ifss nulifs;
+
/* Undefine the following line for the production version. */
/* #define NDEBUG 1 */
#include <assert.h>
@@ -63,18 +92,16 @@ extern char **__libc_argv attribute_hidden;
/* Some forward declarations */
static int parse_dollars (char **word, size_t *word_length, size_t *max_length,
const char *words, size_t *offset, int flags,
- wordexp_t *pwordexp, const char *ifs,
- const char *ifs_white, int quoted)
+ wordexp_t *pwordexp, struct ifss *ifsdata, int quoted)
internal_function;
static int parse_backtick (char **word, size_t *word_length,
size_t *max_length, const char *words,
size_t *offset, int flags, wordexp_t *pwordexp,
- const char *ifs, const char *ifs_white)
+ struct ifss *ifsdata)
internal_function;
static int parse_dquote (char **word, size_t *word_length, size_t *max_length,
const char *words, size_t *offset, int flags,
- wordexp_t *pwordexp, const char *ifs,
- const char *ifs_white)
+ wordexp_t *pwordexp, struct ifss *ifsdata)
internal_function;
static int eval_expr (char *expr, long int *result) internal_function;
@@ -381,8 +408,7 @@ parse_tilde (char **word, size_t *word_length, size_t *max_length,
static int
internal_function
do_parse_glob (const char *glob_word, char **word, size_t *word_length,
- size_t *max_length, wordexp_t *pwordexp, const char *ifs,
- const char *ifs_white)
+ size_t *max_length, wordexp_t *pwordexp, struct ifss *ifsdata)
{
int error;
unsigned int match;
@@ -397,7 +423,7 @@ do_parse_glob (const char *glob_word, char **word, size_t *word_length,
return WRDE_NOSPACE;
}
- if (ifs && !*ifs)
+ if (ifsdata->ifs && !*(ifsdata->ifs))
{
/* No field splitting allowed. */
assert (globbuf.gl_pathv[0] != NULL);
@@ -414,7 +440,7 @@ do_parse_glob (const char *glob_word, char **word, size_t *word_length,
return *word ? 0 : WRDE_NOSPACE;
}
- assert (ifs == NULL || *ifs != '\0');
+ assert (ifsdata->ifs == NULL || *(ifsdata->ifs) != '\0');
if (*word != NULL)
{
free (*word);
@@ -439,7 +465,7 @@ static int
internal_function
parse_glob (char **word, size_t *word_length, size_t *max_length,
const char *words, size_t *offset, int flags,
- wordexp_t *pwordexp, const char *ifs, const char *ifs_white)
+ wordexp_t *pwordexp, struct ifss *ifsdata)
{
/* We are poised just after a '*', a '[' or a '?'. */
int error = WRDE_NOSPACE;
@@ -452,7 +478,7 @@ parse_glob (char **word, size_t *word_length, size_t *max_length,
glob_list.we_offs = 0;
for (; words[*offset] != '\0'; ++*offset)
{
- if (strchr (ifs, words[*offset]) != NULL)
+ if (in_charset (words[*offset], ifsdata->ifs_table))
/* Reached IFS */
break;
@@ -488,7 +514,7 @@ parse_glob (char **word, size_t *word_length, size_t *max_length,
if (quoted != 1 && words[*offset] == '$')
{
error = parse_dollars (word, word_length, max_length, words,
- offset, flags, &glob_list, ifs, ifs_white,
+ offset, flags, &glob_list, ifsdata,
quoted == 2);
if (error)
goto tidy_up;
@@ -523,7 +549,7 @@ parse_glob (char **word, size_t *word_length, size_t *max_length,
*word = w_newword (word_length, max_length);
for (i = 0; error == 0 && i < glob_list.we_wordc; i++)
error = do_parse_glob (glob_list.we_wordv[i], word, word_length,
- max_length, pwordexp, ifs, ifs_white);
+ max_length, pwordexp, ifsdata);
/* Now tidy up */
tidy_up:
@@ -685,7 +711,7 @@ parse_arith (char **word, size_t *word_length, size_t *max_length,
{
case '$':
error = parse_dollars (&expr, &expr_length, &expr_maxlen,
- words, offset, flags, NULL, NULL, NULL, 1);
+ words, offset, flags, NULL, &nulifs, 1);
/* The ``1'' here is to tell parse_dollars not to
* split the fields.
*/
@@ -699,7 +725,7 @@ parse_arith (char **word, size_t *word_length, size_t *max_length,
case '`':
(*offset)++;
error = parse_backtick (&expr, &expr_length, &expr_maxlen,
- words, offset, flags, NULL, NULL, NULL);
+ words, offset, flags, NULL, &nulifs);
/* The first NULL here is to tell parse_backtick not to
* split the fields.
*/
@@ -884,8 +910,7 @@ exec_comm_child (char *comm, int *fildes, int showerr, int noexec)
static int
internal_function
exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
- int flags, wordexp_t *pwordexp, const char *ifs,
- const char *ifs_white)
+ int flags, wordexp_t *pwordexp, struct ifss *ifsdata)
{
int fildes[2];
#define bufsize 128
@@ -1010,10 +1035,10 @@ exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
for (i = 0; i < buflen; ++i)
{
- if (strchr (ifs, buffer[i]) != NULL)
+ if (in_charset (buffer[i], ifsdata->ifs_table))
{
/* Current character is IFS */
- if (strchr (ifs_white, buffer[i]) == NULL)
+ if (!in_charset (buffer[i], ifsdata->ifs_white_table))
{
/* Current character is IFS but not whitespace */
if (copying == 2)
@@ -1142,7 +1167,7 @@ static int
internal_function
parse_comm (char **word, size_t *word_length, size_t *max_length,
const char *words, size_t *offset, int flags, wordexp_t *pwordexp,
- const char *ifs, const char *ifs_white)
+ struct ifss *ifsdata)
{
/* We are poised just after "$(" */
int paren_depth = 1;
@@ -1191,7 +1216,7 @@ parse_comm (char **word, size_t *word_length, size_t *max_length,
#endif
error = exec_comm (comm, word, word_length, max_length,
- flags, pwordexp, ifs, ifs_white);
+ flags, pwordexp, ifsdata);
#ifdef __libc_ptf_call
__libc_ptf_call (pthread_setcancelstate, (state, NULL), 0);
@@ -1222,14 +1247,12 @@ parse_comm (char **word, size_t *word_length, size_t *max_length,
return WRDE_SYNTAX;
}
-#define CHAR_IN_SET(ch, char_set) \
- (memchr (char_set "", ch, sizeof (char_set) - 1) != NULL)
static int
internal_function
parse_param (char **word, size_t *word_length, size_t *max_length,
const char *words, size_t *offset, int flags, wordexp_t *pwordexp,
- const char *ifs, const char *ifs_white, int quoted)
+ struct ifss *ifsdata, int quoted)
{
/* We are poised just after "$" */
enum action
@@ -1269,6 +1292,8 @@ parse_param (char **word, size_t *word_length, size_t *max_length,
if (brace)
++*offset;
+ uint32_t set1[8] = {0x0, 0x410, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0}; /* *@$ */
+
/* First collect the parameter name. */
if (words[*offset] == '#')
@@ -1306,7 +1331,7 @@ parse_param (char **word, size_t *word_length, size_t *max_length,
}
while (isdigit(words[++*offset]));
}
- else if (CHAR_IN_SET (words[*offset], "*@$"))
+ else if (in_charset (words[*offset], set1))
{
/* Special parameter. */
special = 1;
@@ -1349,8 +1374,9 @@ parse_param (char **word, size_t *word_length, size_t *max_length,
}
break;
- case ':':
- if (!CHAR_IN_SET (words[1 + *offset], "-=?+"))
+ case ':':;
+ uint32_t set2[8] = {0x0, 0xa0002800, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; /* -=?+ */
+ if (!in_charset (words[1 + *offset], set2))
goto syntax;
colon_seen = 1;
@@ -1648,7 +1674,7 @@ envsubst:
case '$':
offset = 0;
error = parse_dollars (&expanded, &exp_len, &exp_maxl, p,
- &offset, flags, NULL, NULL, NULL, 1);
+ &offset, flags, NULL, &nulifs, 1);
if (error)
{
if (free_value)
@@ -1991,22 +2017,22 @@ envsubst:
}
/* Skip IFS whitespace before the field */
- field_begin += strspn (field_begin, ifs_white);
+ field_begin += strspn (field_begin, ifsdata->ifs_white);
if (!seen_nonws_ifs && *field_begin == 0)
/* Nothing but whitespace */
break;
/* Search for the end of the field */
- field_end = field_begin + strcspn (field_begin, ifs);
+ field_end = field_begin + strcspn (field_begin, ifsdata->ifs);
/* Set up pointer to the character after end of field and
skip whitespace IFS after it. */
- next_field = field_end + strspn (field_end, ifs_white);
+ next_field = field_end + strspn (field_end, ifsdata->ifs_white);
/* Skip at most one non-whitespace IFS character after the field */
seen_nonws_ifs = 0;
- if (*next_field && strchr (ifs, *next_field))
+ if (*next_field && in_charset (*next_field, ifsdata->ifs_table))
{
seen_nonws_ifs = 1;
next_field++;
@@ -2052,13 +2078,12 @@ do_error:
return error;
}
-#undef CHAR_IN_SET
static int
internal_function
parse_dollars (char **word, size_t *word_length, size_t *max_length,
const char *words, size_t *offset, int flags,
- wordexp_t *pwordexp, const char *ifs, const char *ifs_white,
+ wordexp_t *pwordexp, struct ifss *ifsdata,
int quoted)
{
/* We are poised _at_ "$" */
@@ -2097,7 +2122,7 @@ parse_dollars (char **word, size_t *word_length, size_t *max_length,
(*offset) += 2;
return parse_comm (word, word_length, max_length, words, offset, flags,
- quoted? NULL : pwordexp, ifs, ifs_white);
+ quoted? NULL : pwordexp, ifsdata);
case '[':
(*offset) += 2;
@@ -2109,7 +2134,7 @@ parse_dollars (char **word, size_t *word_length, size_t *max_length,
default:
++(*offset); /* parse_param needs to know if "{" is there */
return parse_param (word, word_length, max_length, words, offset, flags,
- pwordexp, ifs, ifs_white, quoted);
+ pwordexp, ifsdata, quoted);
}
}
@@ -2117,7 +2142,7 @@ static int
internal_function
parse_backtick (char **word, size_t *word_length, size_t *max_length,
const char *words, size_t *offset, int flags,
- wordexp_t *pwordexp, const char *ifs, const char *ifs_white)
+ wordexp_t *pwordexp, struct ifss *ifsdata)
{
/* We are poised just after "`" */
int error;
@@ -2133,7 +2158,7 @@ parse_backtick (char **word, size_t *word_length, size_t *max_length,
case '`':
/* Go -- give the script to the shell */
error = exec_comm (comm, word, word_length, max_length, flags,
- pwordexp, ifs, ifs_white);
+ pwordexp, ifsdata);
free (comm);
return error;
@@ -2181,7 +2206,7 @@ static int
internal_function
parse_dquote (char **word, size_t *word_length, size_t *max_length,
const char *words, size_t *offset, int flags,
- wordexp_t *pwordexp, const char * ifs, const char * ifs_white)
+ wordexp_t *pwordexp, struct ifss *ifsdata)
{
/* We are poised just after a double-quote */
int error;
@@ -2195,7 +2220,7 @@ parse_dquote (char **word, size_t *word_length, size_t *max_length,
case '$':
error = parse_dollars (word, word_length, max_length, words, offset,
- flags, pwordexp, ifs, ifs_white, 1);
+ flags, pwordexp, ifsdata, 1);
/* The ``1'' here is to tell parse_dollars not to
* split the fields. It may need to, however ("$@").
*/
@@ -2207,7 +2232,7 @@ parse_dquote (char **word, size_t *word_length, size_t *max_length,
case '`':
++(*offset);
error = parse_backtick (word, word_length, max_length, words,
- offset, flags, NULL, NULL, NULL);
+ offset, flags, NULL, &nulifs);
/* The first NULL here is to tell parse_backtick not to
* split the fields.
*/
@@ -2340,6 +2365,13 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
*whch = '\0';
}
+ struct ifss ifsdata;
+ ifsdata.ifs = ifs;
+ ifsdata.ifs_white = ifs_white;
+ make_charset (ifs, ifsdata.ifs_table);
+ make_charset (ifs_white, ifsdata.ifs_white_table);
+
+
for (words_offset = 0 ; words[words_offset] ; ++words_offset)
switch (words[words_offset])
{
@@ -2354,7 +2386,7 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
case '$':
error = parse_dollars (&word, &word_length, &max_length, words,
- &words_offset, flags, pwordexp, ifs, ifs_white,
+ &words_offset, flags, pwordexp, &ifsdata,
0);
if (error)
@@ -2365,8 +2397,7 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
case '`':
++words_offset;
error = parse_backtick (&word, &word_length, &max_length, words,
- &words_offset, flags, pwordexp, ifs,
- ifs_white);
+ &words_offset, flags, pwordexp, &ifsdata);
if (error)
goto do_error;
@@ -2376,7 +2407,7 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
case '"':
++words_offset;
error = parse_dquote (&word, &word_length, &max_length, words,
- &words_offset, flags, pwordexp, ifs, ifs_white);
+ &words_offset, flags, pwordexp, &ifsdata);
if (error)
goto do_error;
@@ -2422,21 +2453,24 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
case '[':
case '?':
error = parse_glob (&word, &word_length, &max_length, words,
- &words_offset, flags, pwordexp, ifs, ifs_white);
+ &words_offset, flags, pwordexp, &ifsdata);
if (error)
goto do_error;
break;
- default:
+ default:;
/* Is it a word separator? */
- if (strchr (" \t", words[words_offset]) == NULL)
- {
- char ch = words[words_offset];
+ char ch = words[words_offset];
+ uint32_t set3[8] = {0x200, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; /* \t */
+ if (!in_charset (ch, set3))
+ {
/* Not a word separator -- but is it a valid word char? */
- if (strchr ("\n|&;<>(){}", ch))
+ uint32_t set4[8] = {0x400, 0x58000340, 0x0, 0x38000000, 0x0, 0x0,
+ 0x0, 0x0}; /* \n|&;<>(){} */
+ if (in_charset (ch, set4))
{
/* Fail */
error = WRDE_BADCHAR;