From 593f09eb32f8bd0adab9a7eac5f9e97733108e5e Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Thu, 31 Oct 2013 13:45:58 -0400 Subject: [PATCH] refactor levenshtein suggesting In anticipation for a wider use of levenshtein(), we factor out the common part into a new function levenshtein_suggest(). We then change suggest_functions() to use levenshtein_suggest(). --- tapsets.cxx | 22 +--------------------- util.cxx | 34 +++++++++++++++++++++++++++++++++- util.h | 9 +++++++++ 3 files changed, 43 insertions(+), 22 deletions(-) diff --git a/tapsets.cxx b/tapsets.cxx index 0c083057a..3e7549ac6 100644 --- a/tapsets.cxx +++ b/tapsets.cxx @@ -6802,27 +6802,7 @@ string dwarf_builder::suggest_functions(systemtap_session& sess, if (funcs.empty()) return ""; - // Calculate Levenshtein distance for each symbol - - // Sensitivity parameters (open for tweaking) - #define MAXFUNCS 5 // maximum number of funcs to suggest - - multimap scores; - for (set::iterator it=funcs.begin(); it!=funcs.end(); ++it) - { - unsigned score = levenshtein(func, *it); - scores.insert(make_pair(score, *it)); - } - - // Print out the top MAXFUNCS funcs - string suggestions; unsigned i = 0; - for (multimap::iterator it = scores.begin(); - it != scores.end() && i < MAXFUNCS; ++it, i++) - suggestions += it->second + ", "; - if (!suggestions.empty()) - suggestions.erase(suggestions.size()-2); - - return suggestions; + return levenshtein_suggest(func, funcs, 5); // print top 5 funcs only } void diff --git a/util.cxx b/util.cxx index e84a710fa..d1a00d19c 100644 --- a/util.cxx +++ b/util.cxx @@ -1103,7 +1103,8 @@ get_self_path() // parameter which would abort the operation if we know the final // distance will be larger than the maximum. This may entail maintaining // another data structure, and thus the cost might outweigh the benefit -unsigned levenshtein(const string& a, const string& b) +unsigned +levenshtein(const string& a, const string& b) { Array2D d(a.size()+1, b.size()+1); @@ -1129,6 +1130,37 @@ unsigned levenshtein(const string& a, const string& b) return d(d.width-1, d.height-1); } +// Returns comma-separated list of set elements closest to the target string. +// Print a maximum amount of 'max' elements, with a maximum levenshtein score +// of 'threshold'. +string +levenshtein_suggest(const string& target, // string to match against + const set& elems, // elements to suggest from + unsigned max, // max elements to print + unsigned threshold) // max leven score to print +{ + // calculate leven score for each elem and put in map + multimap scores; + for (set::const_iterator it = elems.begin(); + it != elems.end(); ++it) + { + unsigned score = levenshtein(target, *it); + if (score <= threshold) + scores.insert(make_pair(score, *it)); + } + + string suggestions; + + // Print out the top 'max' elements + multimap::iterator it = scores.begin(); + for (unsigned i = 0; it != scores.end() && i < max; ++it, i++) + suggestions += it->second + ", "; + if (!suggestions.empty()) + suggestions.erase(suggestions.size()-2); + + return suggestions; +} + #ifndef HAVE_PPOLL // This is a poor-man's ppoll, only used carefully by readers that need to be // interruptible, like remote::run and mutator::run. It does not provide the diff --git a/util.h b/util.h index 17696e8bb..534b475be 100644 --- a/util.h +++ b/util.h @@ -15,6 +15,7 @@ #include #include #include +#include extern "C" { #if ENABLE_NLS @@ -328,6 +329,14 @@ class Array2D // String sorter using the Levenshtein algorithm unsigned levenshtein(const std::string& a, const std::string& b); +// Returns comma-separated list of set elements closest to the target string. +// Print a maximum amount of 'max' elements, with a maximum levenshtein score +// of 'threshold'. +std::string levenshtein_suggest(const std::string& target, + const std::set& elems, + unsigned max = std::numeric_limits::max(), + unsigned threshold = std::numeric_limits::max()); + #ifndef HAVE_PPOLL // This is a poor-man's ppoll; see the implementation for more details... int ppoll(struct pollfd *fds, nfds_t nfds, -- 2.43.5