From 7eae03f8629363ef9743babef28d4b9ed603798b Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler" Date: Fri, 7 Aug 2015 14:53:03 -0400 Subject: [PATCH] stringtable tuning As guided by "perf stat -r ..." runs, a few microoptimizations on the string table / interned-string facility. --- stringtable.cxx | 66 ++++++++++++++++++++++++++++++++++++++++++++++++- stringtable.h | 15 ++++++++--- 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/stringtable.cxx b/stringtable.cxx index 5004b8240..c5f472f49 100644 --- a/stringtable.cxx +++ b/stringtable.cxx @@ -13,11 +13,43 @@ #include #include -typedef unordered_set stringtable_t; using namespace std; using namespace boost; +#if INTERNED_STRING_CUSTOM_HASH +// A custom hash +struct stringtable_hash +{ + size_t operator()(const string& c) const { + const char* b = c.data(); + size_t real_length = c.size(); + const size_t blocksize = 64; // a cache line or two + + // hash the length + size_t hash = real_length; + + // hash the beginning + size_t length = real_length; + if (length > blocksize) + length = blocksize; + while (length-- > 0) + hash = (hash * 131) + *b++; + + // hash the last byte + hash = (hash * 131) + *(c.data() + real_length - 1); + + // XXX: hash the middle / end too? + + return hash; + } +}; +typedef unordered_set stringtable_t; +#else +typedef unordered_set stringtable_t; +#endif + + stringtable_t stringtable; @@ -61,6 +93,38 @@ interned_string& interned_string::operator = (const char* value) return *this; } +#if INTERNED_STRING_FIND_MEMMEM +size_t interned_string::find (const boost::string_ref& f) const +{ + const char *ptr = (const char*) memmem (this->data(), this->size(), + f.data(), f.size()); + if (ptr) + return (ptr - this->data()); + else + return boost::string_ref::npos; +} + +size_t interned_string::find (const std::string& f) const +{ + const char *ptr = (const char*) memmem (this->data(), this->size(), + f.data(), f.size()); + if (ptr) + return (ptr - this->data()); + else + return boost::string_ref::npos; +} + +size_t interned_string::find (const char *f) const +{ + const char *ptr = (const char*) memmem (this->data(), this->size(), + f, strlen(f)); + if (ptr) + return (ptr - this->data()); + else + return boost::string_ref::npos; +} +#endif + // The result of c_str() is basically strdup()'d, in anticipation // that interning may result in string_refs that are not followed by \0, // so we can't just pass back ->data(). The strdup()'d memory is diff --git a/stringtable.h b/stringtable.h index 6359631f8..94771fc6c 100644 --- a/stringtable.h +++ b/stringtable.h @@ -13,6 +13,10 @@ #include #include //header with string_ref +// XXX: experimental tunables +#define INTERNED_STRING_FIND_MEMMEM 1 /* perf stat indicates a very slight benefit */ +#define INTERNED_STRING_CUSTOM_HASH 1 /* maybe an abbreviated hash function for long strings? */ + struct interned_string: public boost::string_ref { @@ -39,7 +43,7 @@ struct interned_string: public boost::string_ref // boost oversights template - size_t find (F f, size_t start_pos) + size_t find (const F& f, size_t start_pos) { size_t x = this->substr(start_pos).find(f); if (x == boost::string_ref::npos) @@ -49,11 +53,16 @@ struct interned_string: public boost::string_ref } template - size_t find (F f) + size_t find (const F& f) const { return boost::string_ref::find(f); } - + +#if INTERNED_STRING_FIND_MEMMEM + size_t find (const boost::string_ref& f) const; + size_t find (const std::string& f) const; + size_t find (const char *f) const; +#endif private: mutable char *_c_str; // last value copied out -- 2.43.5