This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH v2] Improve memmem.


On Wed, May 13, 2015 at 09:31:06AM -0700, Paul Eggert wrote:
> OndÅej BÃlka wrote:
> >+  if (needle_len == 1)
> >+    return memchr (haystack, needle[0], haystack_end - haystack);
> >+
> >+  while ((haystack = memchr (haystack, needle[0], haystack_end - haystack)))
> >+    {
> >+      if (haystack[1] == needle[1]
> >+          && (needle_len == 2 || haystack[2] == needle[2]))
> >+        {
> >+          if (needle_len == 2)
> >+            return haystack;
> >+
> >+          if (!memcmp (haystack + 2, needle + 2, needle_len - 2))
> >+            return haystack;
> >+          else
> >+            goto slow_path;
> >+        }
> >+      haystack++;
> >+    }
> >+
> >+  return NULL;
> >+
> >+  slow_path:;
> 
> First, that "haystack[1] == needle[1]" could access past the end of
> the haystack.  Second, can't this be rewritten to avoid that ugly
> goto?  Something like the attached, perhaps.

You wont get illegal acces. Unless I messed up haystack_end calculation.

Here is equivalent without goto. It uses implementation behaviour that
memcmp(x,y,0) doesn't touch anything.

Is this better?

	* string/memmem.c (__memmem): Optimize hot path.

diff --git a/string/memmem.c b/string/memmem.c
index 8a81f65..088a2f1 100644
--- a/string/memmem.c
+++ b/string/memmem.c
@@ -46,6 +46,10 @@ __memmem (const void *haystack_start, size_t haystack_len,
      not an array of 'char' values.  See ISO C 99 section 6.2.6.1.  */
   const unsigned char *haystack = (const unsigned char *) haystack_start;
   const unsigned char *needle = (const unsigned char *) needle_start;
+  const unsigned char *haystack_end = (const unsigned char *)
+                                      haystack_start + haystack_len
+                                      - needle_len + 1;
+
 
   if (needle_len == 0)
     /* The first occurrence of the empty string is deemed to occur at
@@ -57,6 +61,25 @@ __memmem (const void *haystack_start, size_t haystack_len,
   if (__glibc_unlikely (haystack_len < needle_len))
     return NULL;
 
+   if (needle_len == 1)
+     return memchr (haystack, needle[0], haystack_end - haystack);
+
+   for (; ;haystack++) 
+     {
+       haystack = memchr (haystack, needle[0], haystack_end - haystack);
+
+       if (!haystack)
+         return NULL;
+
+       if (haystack[1] == needle[1]
+           && (needle_len == 2 || haystack[2] == needle[2]))
+         {
+           if (!memcmp (haystack + 2, needle + 2, needle_len - 2))
+             return (void *) haystack;
+           break;
+         }
+     }
+ 
   /* Use optimizations in memchr when possible, to reduce the search
      size of haystack using a linear algorithm with a smaller
      coefficient.  However, avoid memchr for long needles, since we


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]