From 924f8d22dbcb8d7809e495f5a8f1545805a83f00 Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler" Date: Fri, 7 Aug 2015 20:19:50 -0400 Subject: [PATCH] stringtable: add diagnostic logging mode With INTERNED_STRING_INSTRUMENT set, a /tmp/hash.log file is written out to record each string being interned. Looking through that is a good way of tuning the hash function, as well as what staptree/etc. variables are reverted to std::string form. (The hint is that if strings are hardly ever interned in the log, then maybe they are not reused either. But that could instead be because they're being reused exclusively in interned string_ref form, so don't need to be rehashed.) --- stringtable.cxx | 42 ++++++++++++++++++++++++++++++++++++------ stringtable.h | 2 +- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/stringtable.cxx b/stringtable.cxx index c5f472f49..5e1f44f63 100644 --- a/stringtable.cxx +++ b/stringtable.cxx @@ -12,11 +12,21 @@ #include #include +#include using namespace std; using namespace boost; + +#ifdef INTERNED_STRING_INSTRUMENT +bool whitespace_p (char c) +{ + return isspace(c); +} +#endif + + #if INTERNED_STRING_CUSTOM_HASH // A custom hash struct stringtable_hash @@ -24,7 +34,7 @@ struct stringtable_hash size_t operator()(const string& c) const { const char* b = c.data(); size_t real_length = c.size(); - const size_t blocksize = 64; // a cache line or two + const size_t blocksize = 32; // a cache line or two // hash the length size_t hash = real_length; @@ -36,11 +46,26 @@ struct stringtable_hash while (length-- > 0) hash = (hash * 131) + *b++; - // hash the last byte - hash = (hash * 131) + *(c.data() + real_length - 1); - - // XXX: hash the middle / end too? - + // hash the middle + if (real_length > blocksize * 3) + { + length = blocksize; // more likely not to span a cache line + b = (const char*)c.data() + (real_length/2); + while (length-- > 0) + hash = (hash * 131) + *b++; + } + + // the ends, especially of generated bits, are likely to be } } } + // \n kinds of similar things + +#ifdef INTERNED_STRING_INSTRUMENT + ofstream f ("/tmp/hash.log", ios::app); + string s = c.substr(0,32); + s.erase (remove_if(s.begin(), s.end(), whitespace_p), s.end()); + f << hash << " " << c.length() << " " << s << endl; + f.close(); +#endif + return hash; } }; @@ -51,6 +76,11 @@ typedef unordered_set stringtable_t; stringtable_t stringtable; +// XXX: set a larger initial size? For reference, a +// +// probe kernel.function("*") {} +// +// can intern some 450,000 entries. // Generate a long-lived string_ref for the given input string. In diff --git a/stringtable.h b/stringtable.h index 94771fc6c..2bd23e23b 100644 --- a/stringtable.h +++ b/stringtable.h @@ -16,7 +16,7 @@ // XXX: experimental tunables #define INTERNED_STRING_FIND_MEMMEM 1 /* perf stat indicates a very slight benefit */ #define INTERNED_STRING_CUSTOM_HASH 1 /* maybe an abbreviated hash function for long strings? */ - +#define INTERNED_STRING_INSTRUMENT 0 /* write out hash logs ... super super slow */ struct interned_string: public boost::string_ref { -- 2.43.5