This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC] Improve wordexp performance.


Hi, as I read Paul's wordexp patch I found that you use inefficient
idiom. Checking character membership with strchr is slow due to startup
cost. Much better is just use table lookup.

This patch does just that.

You could generalize this to strchr in bits/string2.h with constant
haystack but code would be ugly

It is related to improvement on my todo list namely precompute strpbrk
skip table which is done unless you have sse42 thats faster than lookup
table approach.

	* posix/wordexp.c: Precompute character tables.


diff --git a/posix/wordexp.c b/posix/wordexp.c
index e711d43..8939ca9 100644
--- a/posix/wordexp.c
+++ b/posix/wordexp.c
@@ -45,6 +45,35 @@
 #include <bits/libc-lock.h>
 #include <_itoa.h>
 
+
+static int
+in_charset (char c, uint32_t *charset)
+{
+  return charset[c / 32] & (1 << (c % 32));
+}
+
+static void 
+make_charset (char *a, uint32_t *b)
+{
+  int i;
+  for (i = 0; i < 8; i++)
+    b[i] = 0;   
+  while (*a)
+   {
+     b[*a / 32] |= 1 << (*a % 32);
+     a++;
+   }
+}
+
+struct ifss {
+  char *ifs;
+  char *ifs_white;
+  uint32_t ifs_table[8];
+  uint32_t ifs_white_table[8];
+};
+
+struct ifss nulifs;
+
 /* Undefine the following line for the production version.  */
 /* #define NDEBUG 1 */
 #include <assert.h>
@@ -63,18 +92,16 @@ extern char **__libc_argv attribute_hidden;
 /* Some forward declarations */
 static int parse_dollars (char **word, size_t *word_length, size_t *max_length,
 			  const char *words, size_t *offset, int flags,
-			  wordexp_t *pwordexp, const char *ifs,
-			  const char *ifs_white, int quoted)
+			  wordexp_t *pwordexp, struct ifss *ifsdata, int quoted)
      internal_function;
 static int parse_backtick (char **word, size_t *word_length,
 			   size_t *max_length, const char *words,
 			   size_t *offset, int flags, wordexp_t *pwordexp,
-			   const char *ifs, const char *ifs_white)
+			   struct ifss *ifsdata)
      internal_function;
 static int parse_dquote (char **word, size_t *word_length, size_t *max_length,
 			 const char *words, size_t *offset, int flags,
-			 wordexp_t *pwordexp, const char *ifs,
-			 const char *ifs_white)
+			 wordexp_t *pwordexp, struct ifss *ifsdata)
      internal_function;
 static int eval_expr (char *expr, long int *result) internal_function;
 
@@ -381,8 +408,7 @@ parse_tilde (char **word, size_t *word_length, size_t *max_length,
 static int
 internal_function
 do_parse_glob (const char *glob_word, char **word, size_t *word_length,
-	       size_t *max_length, wordexp_t *pwordexp, const char *ifs,
-	       const char *ifs_white)
+	       size_t *max_length, wordexp_t *pwordexp, struct ifss *ifsdata)
 {
   int error;
   unsigned int match;
@@ -397,7 +423,7 @@ do_parse_glob (const char *glob_word, char **word, size_t *word_length,
       return WRDE_NOSPACE;
     }
 
-  if (ifs && !*ifs)
+  if (ifsdata->ifs && !*(ifsdata->ifs))
     {
       /* No field splitting allowed. */
       assert (globbuf.gl_pathv[0] != NULL);
@@ -414,7 +440,7 @@ do_parse_glob (const char *glob_word, char **word, size_t *word_length,
       return *word ? 0 : WRDE_NOSPACE;
     }
 
-  assert (ifs == NULL || *ifs != '\0');
+  assert (ifsdata->ifs == NULL || *(ifsdata->ifs) != '\0');
   if (*word != NULL)
     {
       free (*word);
@@ -439,7 +465,7 @@ static int
 internal_function
 parse_glob (char **word, size_t *word_length, size_t *max_length,
 	    const char *words, size_t *offset, int flags,
-	    wordexp_t *pwordexp, const char *ifs, const char *ifs_white)
+	    wordexp_t *pwordexp, struct ifss *ifsdata)
 {
   /* We are poised just after a '*', a '[' or a '?'. */
   int error = WRDE_NOSPACE;
@@ -452,7 +478,7 @@ parse_glob (char **word, size_t *word_length, size_t *max_length,
   glob_list.we_offs = 0;
   for (; words[*offset] != '\0'; ++*offset)
     {
-      if (strchr (ifs, words[*offset]) != NULL)
+      if (in_charset (words[*offset], ifsdata->ifs_table))
 	/* Reached IFS */
 	break;
 
@@ -488,7 +514,7 @@ parse_glob (char **word, size_t *word_length, size_t *max_length,
       if (quoted != 1 && words[*offset] == '$')
 	{
 	  error = parse_dollars (word, word_length, max_length, words,
-				 offset, flags, &glob_list, ifs, ifs_white,
+				 offset, flags, &glob_list, ifsdata,
 				 quoted == 2);
 	  if (error)
 	    goto tidy_up;
@@ -523,7 +549,7 @@ parse_glob (char **word, size_t *word_length, size_t *max_length,
   *word = w_newword (word_length, max_length);
   for (i = 0; error == 0 && i < glob_list.we_wordc; i++)
     error = do_parse_glob (glob_list.we_wordv[i], word, word_length,
-			   max_length, pwordexp, ifs, ifs_white);
+			   max_length, pwordexp, ifsdata);
 
   /* Now tidy up */
 tidy_up:
@@ -685,7 +711,7 @@ parse_arith (char **word, size_t *word_length, size_t *max_length,
 	{
 	case '$':
 	  error = parse_dollars (&expr, &expr_length, &expr_maxlen,
-				 words, offset, flags, NULL, NULL, NULL, 1);
+				 words, offset, flags, NULL, &nulifs, 1);
 	  /* The ``1'' here is to tell parse_dollars not to
 	   * split the fields.
 	   */
@@ -699,7 +725,7 @@ parse_arith (char **word, size_t *word_length, size_t *max_length,
 	case '`':
 	  (*offset)++;
 	  error = parse_backtick (&expr, &expr_length, &expr_maxlen,
-				  words, offset, flags, NULL, NULL, NULL);
+				  words, offset, flags, NULL, &nulifs);
 	  /* The first NULL here is to tell parse_backtick not to
 	   * split the fields.
 	   */
@@ -884,8 +910,7 @@ exec_comm_child (char *comm, int *fildes, int showerr, int noexec)
 static int
 internal_function
 exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
-	   int flags, wordexp_t *pwordexp, const char *ifs,
-	   const char *ifs_white)
+	   int flags, wordexp_t *pwordexp, struct ifss *ifsdata)
 {
   int fildes[2];
 #define bufsize 128
@@ -1010,10 +1035,10 @@ exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
 
 	  for (i = 0; i < buflen; ++i)
 	    {
-	      if (strchr (ifs, buffer[i]) != NULL)
+	      if (in_charset (buffer[i], ifsdata->ifs_table))
 		{
 		  /* Current character is IFS */
-		  if (strchr (ifs_white, buffer[i]) == NULL)
+		  if (!in_charset (buffer[i], ifsdata->ifs_white_table))
 		    {
 		      /* Current character is IFS but not whitespace */
 		      if (copying == 2)
@@ -1142,7 +1167,7 @@ static int
 internal_function
 parse_comm (char **word, size_t *word_length, size_t *max_length,
 	    const char *words, size_t *offset, int flags, wordexp_t *pwordexp,
-	    const char *ifs, const char *ifs_white)
+	    struct ifss *ifsdata)
 {
   /* We are poised just after "$(" */
   int paren_depth = 1;
@@ -1191,7 +1216,7 @@ parse_comm (char **word, size_t *word_length, size_t *max_length,
 #endif
 
 		  error = exec_comm (comm, word, word_length, max_length,
-				     flags, pwordexp, ifs, ifs_white);
+				     flags, pwordexp, ifsdata);
 
 #ifdef __libc_ptf_call
 		  __libc_ptf_call (pthread_setcancelstate, (state, NULL), 0);
@@ -1222,14 +1247,12 @@ parse_comm (char **word, size_t *word_length, size_t *max_length,
   return WRDE_SYNTAX;
 }
 
-#define CHAR_IN_SET(ch, char_set) \
-  (memchr (char_set "", ch, sizeof (char_set) - 1) != NULL)
 
 static int
 internal_function
 parse_param (char **word, size_t *word_length, size_t *max_length,
 	     const char *words, size_t *offset, int flags, wordexp_t *pwordexp,
-	     const char *ifs, const char *ifs_white, int quoted)
+	     struct ifss *ifsdata, int quoted)
 {
   /* We are poised just after "$" */
   enum action
@@ -1269,6 +1292,8 @@ parse_param (char **word, size_t *word_length, size_t *max_length,
   if (brace)
     ++*offset;
 
+  uint32_t set1[8] = {0x0, 0x410, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0}; /* *@$ */
+
   /* First collect the parameter name. */
 
   if (words[*offset] == '#')
@@ -1306,7 +1331,7 @@ parse_param (char **word, size_t *word_length, size_t *max_length,
 	}
       while (isdigit(words[++*offset]));
     }
-  else if (CHAR_IN_SET (words[*offset], "*@$"))
+  else if (in_charset (words[*offset], set1))
     {
       /* Special parameter. */
       special = 1;
@@ -1349,8 +1374,9 @@ parse_param (char **word, size_t *word_length, size_t *max_length,
 	    }
 	  break;
 
-	case ':':
-	  if (!CHAR_IN_SET (words[1 + *offset], "-=?+"))
+	case ':':;
+          uint32_t set2[8] = {0x0, 0xa0002800, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; /* -=?+ */
+	  if (!in_charset (words[1 + *offset], set2))
 	    goto syntax;
 
 	  colon_seen = 1;
@@ -1648,7 +1674,7 @@ envsubst:
 		case '$':
 		  offset = 0;
 		  error = parse_dollars (&expanded, &exp_len, &exp_maxl, p,
-					 &offset, flags, NULL, NULL, NULL, 1);
+					 &offset, flags, NULL, &nulifs, 1);
 		  if (error)
 		    {
 		      if (free_value)
@@ -1991,22 +2017,22 @@ envsubst:
 	    }
 
 	  /* Skip IFS whitespace before the field */
-	  field_begin += strspn (field_begin, ifs_white);
+	  field_begin += strspn (field_begin, ifsdata->ifs_white);
 
 	  if (!seen_nonws_ifs && *field_begin == 0)
 	    /* Nothing but whitespace */
 	    break;
 
 	  /* Search for the end of the field */
-	  field_end = field_begin + strcspn (field_begin, ifs);
+	  field_end = field_begin + strcspn (field_begin, ifsdata->ifs);
 
 	  /* Set up pointer to the character after end of field and
 	     skip whitespace IFS after it. */
-	  next_field = field_end + strspn (field_end, ifs_white);
+	  next_field = field_end + strspn (field_end, ifsdata->ifs_white);
 
 	  /* Skip at most one non-whitespace IFS character after the field */
 	  seen_nonws_ifs = 0;
-	  if (*next_field && strchr (ifs, *next_field))
+	  if (*next_field && in_charset (*next_field, ifsdata->ifs_table))
 	    {
 	      seen_nonws_ifs = 1;
 	      next_field++;
@@ -2052,13 +2078,12 @@ do_error:
   return error;
 }
 
-#undef CHAR_IN_SET
 
 static int
 internal_function
 parse_dollars (char **word, size_t *word_length, size_t *max_length,
 	       const char *words, size_t *offset, int flags,
-	       wordexp_t *pwordexp, const char *ifs, const char *ifs_white,
+	       wordexp_t *pwordexp, struct ifss *ifsdata,
 	       int quoted)
 {
   /* We are poised _at_ "$" */
@@ -2097,7 +2122,7 @@ parse_dollars (char **word, size_t *word_length, size_t *max_length,
 
       (*offset) += 2;
       return parse_comm (word, word_length, max_length, words, offset, flags,
-			 quoted? NULL : pwordexp, ifs, ifs_white);
+			 quoted? NULL : pwordexp, ifsdata);
 
     case '[':
       (*offset) += 2;
@@ -2109,7 +2134,7 @@ parse_dollars (char **word, size_t *word_length, size_t *max_length,
     default:
       ++(*offset);	/* parse_param needs to know if "{" is there */
       return parse_param (word, word_length, max_length, words, offset, flags,
-			   pwordexp, ifs, ifs_white, quoted);
+			   pwordexp, ifsdata, quoted);
     }
 }
 
@@ -2117,7 +2142,7 @@ static int
 internal_function
 parse_backtick (char **word, size_t *word_length, size_t *max_length,
 		const char *words, size_t *offset, int flags,
-		wordexp_t *pwordexp, const char *ifs, const char *ifs_white)
+		wordexp_t *pwordexp, struct ifss *ifsdata)
 {
   /* We are poised just after "`" */
   int error;
@@ -2133,7 +2158,7 @@ parse_backtick (char **word, size_t *word_length, size_t *max_length,
 	case '`':
 	  /* Go -- give the script to the shell */
 	  error = exec_comm (comm, word, word_length, max_length, flags,
-			     pwordexp, ifs, ifs_white);
+			     pwordexp, ifsdata);
 	  free (comm);
 	  return error;
 
@@ -2181,7 +2206,7 @@ static int
 internal_function
 parse_dquote (char **word, size_t *word_length, size_t *max_length,
 	      const char *words, size_t *offset, int flags,
-	      wordexp_t *pwordexp, const char * ifs, const char * ifs_white)
+	      wordexp_t *pwordexp, struct ifss *ifsdata)
 {
   /* We are poised just after a double-quote */
   int error;
@@ -2195,7 +2220,7 @@ parse_dquote (char **word, size_t *word_length, size_t *max_length,
 
 	case '$':
 	  error = parse_dollars (word, word_length, max_length, words, offset,
-				 flags, pwordexp, ifs, ifs_white, 1);
+				 flags, pwordexp, ifsdata, 1);
 	  /* The ``1'' here is to tell parse_dollars not to
 	   * split the fields.  It may need to, however ("$@").
 	   */
@@ -2207,7 +2232,7 @@ parse_dquote (char **word, size_t *word_length, size_t *max_length,
 	case '`':
 	  ++(*offset);
 	  error = parse_backtick (word, word_length, max_length, words,
-				  offset, flags, NULL, NULL, NULL);
+				  offset, flags, NULL, &nulifs);
 	  /* The first NULL here is to tell parse_backtick not to
 	   * split the fields.
 	   */
@@ -2340,6 +2365,13 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
       *whch = '\0';
     }
 
+  struct ifss ifsdata;
+  ifsdata.ifs = ifs;
+  ifsdata.ifs_white = ifs_white;
+  make_charset (ifs, ifsdata.ifs_table);
+  make_charset (ifs_white, ifsdata.ifs_white_table);
+
+
   for (words_offset = 0 ; words[words_offset] ; ++words_offset)
     switch (words[words_offset])
       {
@@ -2354,7 +2386,7 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
 
       case '$':
 	error = parse_dollars (&word, &word_length, &max_length, words,
-			       &words_offset, flags, pwordexp, ifs, ifs_white,
+			       &words_offset, flags, pwordexp, &ifsdata,
 			       0);
 
 	if (error)
@@ -2365,8 +2397,7 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
       case '`':
 	++words_offset;
 	error = parse_backtick (&word, &word_length, &max_length, words,
-				&words_offset, flags, pwordexp, ifs,
-				ifs_white);
+				&words_offset, flags, pwordexp, &ifsdata);
 
 	if (error)
 	  goto do_error;
@@ -2376,7 +2407,7 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
       case '"':
 	++words_offset;
 	error = parse_dquote (&word, &word_length, &max_length, words,
-			      &words_offset, flags, pwordexp, ifs, ifs_white);
+			      &words_offset, flags, pwordexp, &ifsdata);
 
 	if (error)
 	  goto do_error;
@@ -2422,21 +2453,24 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
       case '[':
       case '?':
 	error = parse_glob (&word, &word_length, &max_length, words,
-			    &words_offset, flags, pwordexp, ifs, ifs_white);
+			    &words_offset, flags, pwordexp, &ifsdata);
 
 	if (error)
 	  goto do_error;
 
 	break;
 
-      default:
+      default:;
 	/* Is it a word separator? */
-	if (strchr (" \t", words[words_offset]) == NULL)
-	  {
-	    char ch = words[words_offset];
+	char ch = words[words_offset];
 
+        uint32_t set3[8] = {0x200, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; /* \t */
+	if (!in_charset (ch, set3))
+	  {
 	    /* Not a word separator -- but is it a valid word char? */
-	    if (strchr ("\n|&;<>(){}", ch))
+            uint32_t set4[8] = {0x400, 0x58000340, 0x0, 0x38000000, 0x0, 0x0, 
+                                0x0, 0x0}; /* \n|&;<>(){} */
+	    if (in_charset (ch, set4))
 	      {
 		/* Fail */
 		error = WRDE_BADCHAR;


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]