[PATCH v2] Fix slow and non-deterministic behavior of isspace() and tolower()

Shawn Landden shawn@git.icu
Mon Jun 10 21:30:00 GMT 2019


I was getting 8% and 6% cpu usage in tolower() and isspace(),
respectively, waiting for a breakpoint on ppc64el.

Also, gdb doesn't want non-deterministic behavior here.

v2: do not clash with C99 names
---
 gdb/utils.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/gdb/utils.c b/gdb/utils.c
index 9686927473..0b68fabe4d 100644
--- a/gdb/utils.c
+++ b/gdb/utils.c
@@ -2626,10 +2626,29 @@ strcmp_iw (const char *string1, const char *string2)
    user searches for "foo", then strcmp will sort "foo" before "foo$".
    Then lookup_partial_symbol will notice that strcmp_iw("foo$",
    "foo") is false, so it won't proceed to the actual match of
    "foo(int)" with "foo".  */
 
+/* glibc versions of these have non-deterministic locale-dependant behavior,
+   and are very slow, taking 8% and 6% of total CPU time with some use-cases */
+
+static inline int isspace_inline(int c)
+{
+  return c == ' ' || (unsigned)c-'\t' < 5;
+}
+
+static inline int isupper_inline(int c)
+{
+  return (unsigned)c-'A' < 26;
+}
+
+static inline int tolower_inline(int c)
+{
+  if (isupper(c)) return c | 32;
+  return c;
+}
+
 int
 strcmp_iw_ordered (const char *string1, const char *string2)
 {
   const char *saved_string1 = string1, *saved_string2 = string2;
   enum case_sensitivity case_pass = case_sensitive_off;
@@ -2641,20 +2660,20 @@ strcmp_iw_ordered (const char *string1, const char *string2)
 	 strings.  */
       char c1 = 'X', c2 = 'X';
 
       while (*string1 != '\0' && *string2 != '\0')
 	{
-	  while (isspace (*string1))
+	  while (isspace_inline (*string1))
 	    string1++;
-	  while (isspace (*string2))
+	  while (isspace_inline (*string2))
 	    string2++;
 
 	  switch (case_pass)
 	  {
 	    case case_sensitive_off:
-	      c1 = tolower ((unsigned char) *string1);
-	      c2 = tolower ((unsigned char) *string2);
+	      c1 = tolower_inline ((unsigned char) *string1);
+	      c2 = tolower_inline ((unsigned char) *string2);
 	      break;
 	    case case_sensitive_on:
 	      c1 = *string1;
 	      c2 = *string2;
 	      break;
-- 
2.20.1



More information about the Gdb-patches mailing list