This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH 6/*] Generic string function optimization: strcasestr
- From: OndÅej BÃlka <neleai at seznam dot cz>
- To: libc-alpha at sourceware dot org
- Date: Thu, 28 May 2015 19:45:34 +0200
- Subject: [PATCH 6/*] Generic string function optimization: strcasestr
- Authentication-results: sourceware.org; auth=none
- References: <20150527060121 dot GA19105 at domone>
Hi,
Here I made wrong assumption for long time that prevented me to see
optimization in strcasestr. Now I realized that looking for leading pair
would likely work. I though that it would be necessary to construct
table that assigns to each characters list of characters in same
equivalence class.
I don't have to do that if like in strchr will check if its ascii. Then
I could approximate toupper(x) = toupper(c) by expression
(x == toupper (c)) | (x == tolower (c)) | (x > 128)
which is easily evaluable in parallel, then and it with shifted
expression for second character.
I added it now just as simple heuristic, a more buy-or-rent approach
will follow in next patch.
I also needed to remove test/bench-strcasestr as I need locale check for
ascii-compatibility of towlower map.
Comments?
* benchtests/bench-strcasestr.c: Remove simple_strcasestr.
* string/test-strcasestr.c: Likewise.
* string/skeleton.h: Customize cmask parameter with CMASK_PARAM macro
* string/strcasestr.c: Optimize by fast search of leading digraph.
diff --git a/benchtests/bench-strcasestr.c b/benchtests/bench-strcasestr.c
index 33531a4..6c6309f 100644
--- a/benchtests/bench-strcasestr.c
+++ b/benchtests/bench-strcasestr.c
@@ -21,10 +21,6 @@
#include "bench-string.h"
-#define STRCASESTR simple_strcasestr
-#define NO_ALIAS
-#define __strncasecmp strncasecmp
-#include "../string/strcasestr.c"
static char *
@@ -53,7 +49,6 @@ stupid_strcasestr (const char *s1, const char *s2)
typedef char *(*proto_t) (const char *, const char *);
IMPL (stupid_strcasestr, 0)
-IMPL (simple_strcasestr, 0)
IMPL (strcasestr, 1)
diff --git a/string/skeleton.h b/string/skeleton.h
index 26f2d9f..74d4b9f 100644
--- a/string/skeleton.h
+++ b/string/skeleton.h
@@ -31,9 +31,17 @@
#define EXPRESSION(x,y) EXPRESSION_NOCARRY(x,y)
#endif
+#ifdef CUSTOM_CMASK
+#define CMASK_PARAM_MASK CMASK_PARAM
+#else
+#define CMASK_PARAM int c_in
+#define CMASK_PARAM_MASK unsigned long int cmask
+#endif
+
+
static __always_inline
int
-found_in_long_bytes(char *s, unsigned long int cmask, char **result)
+found_in_long_bytes(char *s, CMASK_PARAM_MASK, char **result)
{
const unsigned long int *lptr = (const unsigned long int *) s;
unsigned long int mask = EXPRESSION(*lptr, cmask);
@@ -46,16 +54,21 @@ found_in_long_bytes(char *s, unsigned long int cmask, char **result)
return 0;
}
+
+
+
static __always_inline
char *
-string_skeleton (const char *s_in, int c_in, char *end)
+string_skeleton (const char *s_in, CMASK_PARAM, char *end)
{
unsigned long int mask;
const unsigned long int *lptr;
char *s = (char *) s_in;
- unsigned char c = (unsigned char) c_in;
char *r;
+#ifndef CUSTOM_CMASK
+ unsigned char c = (unsigned char) c_in;
unsigned long int __attribute__ ((unused)) cmask = c * ones;
+#endif
#if _STRING_ARCH_unaligned
/* We fetch 32 bytes while not crossing page boundary.
diff --git a/string/strcasestr.c b/string/strcasestr.c
index 400fab8..7c984c8 100644
--- a/string/strcasestr.c
+++ b/string/strcasestr.c
@@ -57,6 +57,37 @@
#define STRCASESTR __strcasestr
#endif
+#include "string/common.h"
+
+struct cmask
+{
+ unsigned long int l0, u0, l1, u1;
+};
+#define CUSTOM_CMASK
+#define CMASK_PARAM struct cmask cmask
+
+#define EXPRESSION(x, cmask) (\
+ ( contains_zero ((x >> 8) ^ cmask.l0) \
+ | contains_zero ((x >> 8) ^ cmask.u0) \
+ | (x >> 8)) \
+ | (1UL << (8 * LSIZE - 1)) \
+ & (contains_zero (x ^ cmask.l1) \
+ | contains_zero (x ^ cmask.l1) \
+ | x))
+#define EXPRESSION_NOCARRY(x,cmask) (\
+ ( contains_zero_nocarry ((x >> 8) ^ cmask.l0) \
+ | contains_zero_nocarry ((x >> 8) ^ cmask.u0) \
+ | (x >> 8)) \
+ | (1UL << (8 * LSIZE - 1)) \
+ & (contains_zero_nocarry (x ^ cmask.l1) \
+ | contains_zero_nocarry (x ^ cmask.l1) \
+ | x))
+
+#include "string/skeleton.h"
+
+#include "../locale/localeinfo.h"
+
+
/* Find the first occurrence of NEEDLE in HAYSTACK, using
case-insensitive comparison. This function gives unspecified
@@ -70,6 +101,23 @@ STRCASESTR (const char *haystack_start, const char *needle_start)
size_t haystack_len; /* Known minimum length of HAYSTACK. */
bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */
+ __locale_t loc = _NL_CURRENT_LOCALE;
+ struct __locale_data *ctype = loc->__locales[LC_CTYPE];
+ int nonascii = ctype->values[_NL_ITEM_INDEX (_NL_CTYPE_NONASCII_CASE)].word;
+
+ unsigned char *n = (unsigned char *) needle;
+ if (!nonascii && haystack[0] != 0 && n[0] != 0 && n[0] < 128
+ && n[1] != 0 && n[1] < 128)
+ {
+ struct cmask cmask;
+ cmask.l0 = tolower (n[0]) * ones;
+ cmask.u0 = toupper (n[0]) * ones;
+ cmask.l1 = tolower (n[1]) * ones;
+ cmask.u1 = toupper (n[1]) * ones;
+ haystack = string_skeleton (haystack + 1, cmask, NULL) - 1;
+ }
+
+
/* Determine length of NEEDLE, and in the process, make sure
HAYSTACK is at least as long (no point processing all of a long
NEEDLE if HAYSTACK is too short). */
diff --git a/string/test-strcasestr.c b/string/test-strcasestr.c
index 489dc84..3c01881 100644
--- a/string/test-strcasestr.c
+++ b/string/test-strcasestr.c
@@ -25,7 +25,6 @@
#define STRCASESTR simple_strcasestr
#define NO_ALIAS
#define __strncasecmp strncasecmp
-#include "strcasestr.c"
static char *
@@ -54,7 +53,6 @@ stupid_strcasestr (const char *s1, const char *s2)
typedef char *(*proto_t) (const char *, const char *);
IMPL (stupid_strcasestr, 0)
-IMPL (simple_strcasestr, 0)
IMPL (strcasestr, 1)