This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[commit] Speed up cp_canonicalize_string


Some performance tuning on cp_canonicalize_string:

- Short circuit simple identifiers.
- Do not allocate and free the work buffer every call.
- Do not use strlen unnecessarily.

Also fix a GCC 4.2 warning building test-cp-name-parser.o.

With these fixes calling cp_canonicalize_string on every name in a
large collection of C++ input files is about eight times faster
(we don't do that yet so there is no performance change in current
GDB).  Tested on x86_64-linux and checked in.

-- 
Daniel Jacobowitz
CodeSourcery

2007-10-22  Daniel Jacobowitz  <dan@codesourcery.com>

	* cp-support.c: Include "safe-ctype.h".
	(cp_already_canonical): New function.
	(cp_canonicalize_string): Use it.  Return NULL for already canonical
	strings.
	(mangled_name_to_comp): Update call to cp_demangled_name_to_comp.
	(cp_func_name, remove_params): Likewise.
	(cp_find_first_component_aux): Use ISSPACE.
	* cp-support.h (cp_demangled_name_to_comp): Correct comment.  Remove
	MEMORY_P argument.
	* cp-name-parser.y (ALLOC_CHUNK): Define.
	(struct demangle_info): Add PREV and NEXT.  Increase the size of
	COMPS.
	(d_grab): Convert to a function.
	(allocate_info): Rewrite.
	(cp_demangled_name_to_comp): Remove MEMORY argument.  Do not use
	strlen.  Update call to allocate_info.  Do not free it on failure.
	(main): Update calls to cp_demangled_name_to_comp.
	* Makefile.in (cp-support.o): Update.

Index: cp-support.c
===================================================================
RCS file: /cvs/src/src/gdb/cp-support.c,v
retrieving revision 1.24
diff -u -p -r1.24 cp-support.c
--- cp-support.c	23 Aug 2007 18:08:27 -0000	1.24
+++ cp-support.c	22 Oct 2007 14:27:42 -0000
@@ -19,7 +19,6 @@
    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
 #include "defs.h"
-#include <ctype.h>
 #include "cp-support.h"
 #include "gdb_string.h"
 #include "demangle.h"
@@ -33,6 +32,8 @@
 #include "complaints.h"
 #include "gdbtypes.h"
 
+#include "safe-ctype.h"
+
 #define d_left(dc) (dc)->u.s_binary.left
 #define d_right(dc) (dc)->u.s_binary.right
 
@@ -68,30 +69,63 @@ struct cmd_list_element *maint_cplus_cmd
 static void maint_cplus_command (char *arg, int from_tty);
 static void first_component_command (char *arg, int from_tty);
 
-/* Return the canonicalized form of STRING, or NULL if STRING can not be
-   parsed.  The return value is allocated via xmalloc.
+/* Return 1 if STRING is clearly already in canonical form.  This
+   function is conservative; things which it does not recognize are
+   assumed to be non-canonical, and the parser will sort them out
+   afterwards.  This speeds up the critical path for alphanumeric
+   identifiers.  */
+
+static int
+cp_already_canonical (const char *string)
+{
+  /* Identifier start character [a-zA-Z_].  */
+  if (!ISIDST (string[0]))
+    return 0;
+
+  /* These are the only two identifiers which canonicalize to other
+     than themselves or an error: unsigned -> unsigned int and
+     signed -> int.  */
+  if (string[0] == 'u' && strcmp (&string[1], "nsigned") == 0)
+    return 0;
+  else if (string[0] == 's' && strcmp (&string[1], "igned") == 0)
+    return 0;
+
+  /* Identifier character [a-zA-Z0-9_].  */
+  while (ISIDNUM (string[1]))
+    string++;
+
+  if (string[1] == '\0')
+    return 1;
+  else
+    return 0;
+}
 
-   drow/2005-03-07: Should we also return NULL for things that trivially do
-   not require any change?  e.g. simple identifiers.  This could be more
-   efficient.  */
+/* Parse STRING and convert it to canonical form.  If parsing fails,
+   or if STRING is already canonical, return NULL.  Otherwise return
+   the canonical form.  The return value is allocated via xmalloc.  */
 
 char *
 cp_canonicalize_string (const char *string)
 {
-  void *storage;
   struct demangle_component *ret_comp;
+  unsigned int estimated_len;
   char *ret;
-  int len = strlen (string);
 
-  len = len + len / 8;
+  if (cp_already_canonical (string))
+    return NULL;
 
-  ret_comp = cp_demangled_name_to_comp (string, &storage, NULL);
+  ret_comp = cp_demangled_name_to_comp (string, NULL);
   if (ret_comp == NULL)
     return NULL;
 
-  ret = cp_comp_to_string (ret_comp, len);
+  estimated_len = strlen (string) * 2;
+  ret = cp_comp_to_string (ret_comp, estimated_len);
 
-  xfree (storage);
+  if (strcmp (string, ret) == 0)
+    {
+      xfree (ret);
+      return NULL;
+    }
 
   return ret;
 }
@@ -128,7 +162,7 @@ mangled_name_to_comp (const char *mangle
    return NULL;
   
   /* If we could demangle the name, parse it to build the component tree.  */
-  ret = cp_demangled_name_to_comp (demangled_name, memory, NULL);
+  ret = cp_demangled_name_to_comp (demangled_name, NULL);
 
   if (ret == NULL)
     {
@@ -321,12 +355,11 @@ method_name_from_physname (const char *p
 char *
 cp_func_name (const char *full_name)
 {
-  void *storage;
   char *ret;
   struct demangle_component *ret_comp;
   int done;
 
-  ret_comp = cp_demangled_name_to_comp (full_name, &storage, NULL);
+  ret_comp = cp_demangled_name_to_comp (full_name, NULL);
   if (!ret_comp)
     return NULL;
 
@@ -336,7 +369,6 @@ cp_func_name (const char *full_name)
   if (ret_comp != NULL)
     ret = cp_comp_to_string (ret_comp, 10);
 
-  xfree (storage);
   return ret;
 }
 
@@ -349,13 +381,12 @@ remove_params (const char *demangled_nam
 {
   int done = 0;
   struct demangle_component *ret_comp;
-  void *storage;
   char *ret = NULL;
 
   if (demangled_name == NULL)
     return NULL;
 
-  ret_comp = cp_demangled_name_to_comp (demangled_name, &storage, NULL);
+  ret_comp = cp_demangled_name_to_comp (demangled_name, NULL);
   if (ret_comp == NULL)
     return NULL;
 
@@ -381,7 +412,6 @@ remove_params (const char *demangled_nam
   if (ret_comp->type == DEMANGLE_COMPONENT_TYPED_NAME)
     ret = cp_comp_to_string (d_left (ret_comp), 10);
 
-  xfree (storage);
   return ret;
 }
 
@@ -511,7 +541,7 @@ cp_find_first_component_aux (const char 
 	      && strncmp (name + index, "operator", LENGTH_OF_OPERATOR) == 0)
 	    {
 	      index += LENGTH_OF_OPERATOR;
-	      while (isspace(name[index]))
+	      while (ISSPACE(name[index]))
 		++index;
 	      switch (name[index])
 		{
Index: cp-support.h
===================================================================
RCS file: /cvs/src/src/gdb/cp-support.h,v
retrieving revision 1.19
diff -u -p -r1.19 cp-support.h
--- cp-support.h	23 Aug 2007 18:08:28 -0000	1.19
+++ cp-support.h	22 Oct 2007 14:27:42 -0000
@@ -114,10 +114,10 @@ extern void cp_check_possible_namespace_
 
 struct type *cp_lookup_transparent_type (const char *name);
 
-/* Functions from cp-names.y.  */
+/* Functions from cp-name-parser.y.  */
 
 extern struct demangle_component *cp_demangled_name_to_comp
-  (const char *demangled_name, void **memory_p, const char **errmsg);
+  (const char *demangled_name, const char **errmsg);
 
 extern char *cp_comp_to_string (struct demangle_component *result,
 				int estimated_len);
Index: cp-name-parser.y
===================================================================
RCS file: /cvs/src/src/gdb/cp-name-parser.y,v
retrieving revision 1.5
diff -u -p -r1.5 cp-name-parser.y
--- cp-name-parser.y	9 Jan 2007 17:58:50 -0000	1.5
+++ cp-name-parser.y	22 Oct 2007 14:27:43 -0000
@@ -54,13 +54,38 @@ static const char *lexptr, *prev_lexptr,
 /* The components built by the parser are allocated ahead of time,
    and cached in this structure.  */
 
+#define ALLOC_CHUNK 100
+
 struct demangle_info {
   int used;
-  struct demangle_component comps[1];
+  struct demangle_info *prev, *next;
+  struct demangle_component comps[ALLOC_CHUNK];
 };
 
 static struct demangle_info *demangle_info;
-#define d_grab() (&demangle_info->comps[demangle_info->used++])
+
+static struct demangle_component *
+d_grab (void)
+{
+  struct demangle_info *more;
+
+  if (demangle_info->used >= ALLOC_CHUNK)
+    {
+      if (demangle_info->next == NULL)
+	{
+	  more = malloc (sizeof (struct demangle_info));
+	  more->prev = demangle_info;
+	  more->next = NULL;
+	  demangle_info->next = more;
+	}
+      else
+	more = demangle_info->next;
+
+      more->used = 0;
+      demangle_info = more;
+    }
+  return &demangle_info->comps[demangle_info->used++];
+}
 
 /* The parse tree created by the parser is stored here after a successful
    parse.  */
@@ -1923,19 +1950,24 @@ yyerror (char *msg)
   global_errmsg = msg ? msg : "parse error";
 }
 
-/* Allocate all the components we'll need to build a tree.  We generally
-   allocate too many components, but the extra memory usage doesn't hurt
-   because the trees are temporary.  If we start keeping the trees for
-   a longer lifetime we'll need to be cleverer.  */
-static struct demangle_info *
-allocate_info (int comps)
-{
-  struct demangle_info *ret;
-
-  ret = malloc (sizeof (struct demangle_info)
-		+ sizeof (struct demangle_component) * (comps - 1));
-  ret->used = 0;
-  return ret;
+/* Allocate a chunk of the components we'll need to build a tree.  We
+   generally allocate too many components, but the extra memory usage
+   doesn't hurt because the trees are temporary and the storage is
+   reused.  More may be allocated later, by d_grab.  */
+static void
+allocate_info (void)
+{
+  if (demangle_info == NULL)
+    {
+      demangle_info = malloc (sizeof (struct demangle_info));
+      demangle_info->prev = NULL;
+      demangle_info->next = NULL;
+    }
+  else
+    while (demangle_info->prev)
+      demangle_info = demangle_info->prev;
+
+  demangle_info->used = 0;
 }
 
 /* Convert RESULT to a string.  The return value is allocated
@@ -1975,26 +2007,23 @@ cp_comp_to_string (struct demangle_compo
   return (buf);
 }
 
-/* Convert a demangled name to a demangle_component tree.  *MEMORY is set to the
-   block of used memory that should be freed when finished with the
-   tree.  On error, NULL is returned, and an error message will be
-   set in *ERRMSG (which does not need to be freed).  */
+/* Convert a demangled name to a demangle_component tree.  On success,
+   the root of the new tree is returned; it is valid until the next
+   call to this function and should not be freed.  On error, NULL is
+   returned, and an error message will be set in *ERRMSG (which does
+   not need to be freed).  */
 
 struct demangle_component *
-cp_demangled_name_to_comp (const char *demangled_name, void **memory,
-			   const char **errmsg)
+cp_demangled_name_to_comp (const char *demangled_name, const char **errmsg)
 {
   static char errbuf[60];
   struct demangle_component *result;
 
-  int len = strlen (demangled_name);
-
-  len = len + len / 8;
   prev_lexptr = lexptr = demangled_name;
   error_lexptr = NULL;
   global_errmsg = NULL;
 
-  demangle_info = allocate_info (len);
+  allocate_info ();
 
   if (yyparse ())
     {
@@ -2005,11 +2034,9 @@ cp_demangled_name_to_comp (const char *d
 	  strcat (errbuf, "'");
 	  *errmsg = errbuf;
 	}
-      free (demangle_info);
       return NULL;
     }
 
-  *memory = demangle_info;
   result = global_result;
   global_result = NULL;
 
@@ -2063,11 +2090,10 @@ trim_chars (char *lexptr, char **extra_c
 int
 main (int argc, char **argv)
 {
-  char *str2, *extra_chars, c;
+  char *str2, *extra_chars = "", c;
   char buf[65536];
   int arg;
   const char *errmsg;
-  void *memory;
   struct demangle_component *result;
 
   arg = 1;
@@ -2094,7 +2120,7 @@ main (int argc, char **argv)
 	      printf ("%s\n", buf);
 	    continue;
 	  }
-	result = cp_demangled_name_to_comp (str2, &memory, &errmsg);
+	result = cp_demangled_name_to_comp (str2, &errmsg);
 	if (result == NULL)
 	  {
 	    fputs (errmsg, stderr);
@@ -2103,7 +2129,6 @@ main (int argc, char **argv)
 	  }
 
 	cp_print (result);
-	free (memory);
 
 	free (str2);
 	if (c)
@@ -2115,7 +2140,7 @@ main (int argc, char **argv)
       }
   else
     {
-      result = cp_demangled_name_to_comp (argv[arg], &memory, &errmsg);
+      result = cp_demangled_name_to_comp (argv[arg], &errmsg);
       if (result == NULL)
 	{
 	  fputs (errmsg, stderr);
@@ -2124,7 +2149,6 @@ main (int argc, char **argv)
 	}
       cp_print (result);
       putchar ('\n');
-      free (memory);
     }
   return 0;
 }
Index: Makefile.in
===================================================================
RCS file: /cvs/src/src/gdb/Makefile.in,v
retrieving revision 1.945
diff -u -p -r1.945 Makefile.in
--- Makefile.in	15 Oct 2007 19:45:30 -0000	1.945
+++ Makefile.in	22 Oct 2007 14:27:43 -0000
@@ -1931,7 +1931,7 @@ cp-namespace.o: cp-namespace.c $(defs_h)
 cp-support.o: cp-support.c $(defs_h) $(cp_support_h) $(gdb_string_h) \
 	$(demangle_h) $(gdb_assert_h) $(gdbcmd_h) $(dictionary_h) \
 	$(objfiles_h) $(frame_h) $(symtab_h) $(block_h) $(complaints_h) \
-	$(gdbtypes_h)
+	$(gdbtypes_h) $(safe_ctype_h)
 cp-valprint.o: cp-valprint.c $(defs_h) $(gdb_obstack_h) $(symtab_h) \
 	$(gdbtypes_h) $(expression_h) $(value_h) $(command_h) $(gdbcmd_h) \
 	$(demangle_h) $(annotate_h) $(gdb_string_h) $(c_lang_h) $(target_h) \


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]