This is the mail archive of the
gdb-patches@sourceware.org
mailing list for the GDB project.
Re: [RFC] Use custom hash function with bcache
- From: Doug Evans <dje at google dot com>
- To: sami wagiaalla <swagiaal at redhat dot com>
- Cc: gdb-patches at sourceware dot org
- Date: Mon, 16 Aug 2010 11:28:53 -0700
- Subject: Re: [RFC] Use custom hash function with bcache
- References: <4C6946E1.6000709@redhat.com>
On Mon, Aug 16, 2010 at 7:10 AM, sami wagiaalla <swagiaal@redhat.com> wrote:
> This patch enables the use of custom hash and comparison functions when
> adding elements to a bcache. The patch also introduces hash and comparison
> functions which take into consideration only the relevant properties of the
> psymbol.
>
> Tested by running the test suit on F13 with gcc 4.4.4 on x8664, no
> regressions.
>
> also used 'main print statistics' to ensure that the bcache object count and
> unique object count are as expected with a small test case.
>
> Sami
>
Hi. Comments inline with the patch.
2010-08-13 Sami Wagiaalla <swagiaal@redhat.com>
* psymtab.c (psymbol_hash): New function.
(psymbol_compare): New function.
(add_psymbol_to_bcache): pass psymbol_hash and psymbol_compare
to bcache_full.
* bcache.c (hash_continue): New.
(hash): Use hash_continue.
(bcache): Now takes hash_function, compare_function arguments.
(bcache_full): Ditto.
* bcache.c (bcache): Update prototype.
(bcache_full): Ditto.
* macrotab.c (macro_bcache): Updated.
* symfile.c (allocate_symtab): Updated.
diff --git a/gdb/bcache.c b/gdb/bcache.c
index 7d9180c..bef2596 100644
--- a/gdb/bcache.c
+++ b/gdb/bcache.c
@@ -98,12 +98,19 @@ struct bcache
unsigned long
hash(const void *addr, int length)
{
+ return hash_continue (addr, length, 0);
+}
+
+/* Continue the calculation of the hash H at the given address. */
+
+unsigned long
+hash_continue (const void *addr, int length, unsigned long h)
+{
const unsigned char *k, *e;
- unsigned long h;
k = (const unsigned char *)addr;
e = k+length;
- for (h=0; k< e;++k)
+ for (; k< e;++k)
{
h *=16777619;
h ^= *k;
@@ -194,21 +201,34 @@ expand_hash_table (struct bcache *bcache)
/* Find a copy of the LENGTH bytes at ADDR in BCACHE. If BCACHE has
never seen those bytes before, add a copy of them to BCACHE. In
- either case, return a pointer to BCACHE's copy of that string. */
+ either case, return a pointer to BCACHE's copy of that string.
+ If HASH_FUNCTION and COMPARE_FUNCTION are not NULL they will be
+ used for hash calculation and comparing table elements respectively.
+ Otherwise the hash is calculated using the byte string and a
+ simple byte comparison is performed. */
blank line between a function comment and definition
const void *
-bcache (const void *addr, int length, struct bcache *bcache)
+bcache (const void *addr, int length, struct bcache *bcache,
+ unsigned long (*hash_function)(const void*),
+ int (*compare_function)(const void*, const void*))
{
- return bcache_full (addr, length, bcache, NULL);
+ return bcache_full (addr, length, bcache, NULL, hash_function,
+ compare_function);
}
I think it would be better to attach the hash and compare functions to
the bcache object.
E.g. add hash_function, compare_function parameters to bcache_xmalloc.
/* Find a copy of the LENGTH bytes at ADDR in BCACHE. If BCACHE has
never seen those bytes before, add a copy of them to BCACHE. In
either case, return a pointer to BCACHE's copy of that string. If
optional ADDED is not NULL, return 1 in case of new entry or 0 if
- returning an old entry. */
+ returning an old entry.
+ If HASH_FUNCTION and COMPARE_FUNCTION are not NULL they will be
+ used for hash calculation and comparing table elements respectively.
+ Otherwise the hash is calculated using the byte string and a
+ simple byte comparison is performed. */
const void *
-bcache_full (const void *addr, int length, struct bcache *bcache, int *added)
+bcache_full (const void *addr, int length, struct bcache *bcache, int *added,
+ unsigned long (*hash_function)(const void*),
+ int (*compare_function)(const void*, const void*))
{
unsigned long full_hash;
unsigned short half_hash;
@@ -235,7 +255,11 @@ bcache_full (const void *addr, int length, struct
bcache *bcache, int *added)
bcache->total_count++;
bcache->total_size += length;
- full_hash = hash (addr, length);
+ if (hash_function == NULL)
+ full_hash = hash (addr, length);
+ else
+ full_hash = hash_function (addr);
[for reference sake]
Once hash_function is in struct bcache I'd store a pointer to the
default hasher, instead of having NULL mean "use default hasher".
[see below for compare_function - I wrote that part first]
+
half_hash = (full_hash >> 16);
hash_index = full_hash % bcache->num_buckets;
@@ -246,9 +270,18 @@ bcache_full (const void *addr, int length, struct
bcache *bcache, int *added)
{
if (s->half_hash == half_hash)
{
- if (s->length == length
- && ! memcmp (&s->d.data, addr, length))
- return &s->d.data;
+ if (s->length == length)
+ {
+ int equal = 0;
+
+ if (compare_function == NULL)
+ equal = (memcmp (&s->d.data, addr, length) == 0);
+ else
+ equal = compare_function (&s->d.data, addr);
[for reference sake]
Once compare_function lives in struct bcache, I have a slight
preference for not having compare_function == NULL mean "use memcmp".
Instead store a pointer to memcmp (or wrapper) in the field (and then
you don't have to test for NULL above). It would mean that all
compare_functions would get a length parameter that they may not use
(except perhaps in an assert that the value is the expected size), but
I think that's ok.
A NULL value for compare_function passed to bcache_xmalloc could mean
"use default" though.
+
+ if (equal)
+ return &s->d.data;
+ }
else
bcache->half_hash_miss_count++;
}
diff --git a/gdb/bcache.h b/gdb/bcache.h
index da69a2d..799c744 100644
--- a/gdb/bcache.h
+++ b/gdb/bcache.h
@@ -146,13 +146,17 @@ struct bcache;
Since the cached value is ment to be read-only, return a const
buffer. */
extern const void *bcache (const void *addr, int length,
- struct bcache *bcache);
+ struct bcache *bcache,
+ unsigned long (*hash_function)(const void*),
+ int (*compare_function)(const void*, const void*));
/* Like bcache, but if ADDED is not NULL, set *ADDED to true if the
bytes were newly added to the cache, or to false if the bytes were
found in the cache. */
extern const void *bcache_full (const void *addr, int length,
- struct bcache *bcache, int *added);
+ struct bcache *bcache, int *added,
+ unsigned long (*hash_function)(const void*),
+ int (*compare_function)(const void*, const void*));
/* Free all the storage used by BCACHE. */
extern void bcache_xfree (struct bcache *bcache);
@@ -169,5 +173,6 @@ extern int bcache_memory_used (struct bcache *bcache);
/* The hash function */
extern unsigned long hash(const void *addr, int length);
-
+extern unsigned long hash_continue (const void *addr, int length,
+ unsigned long h);
blank line
#endif /* BCACHE_H */
diff --git a/gdb/macrotab.c b/gdb/macrotab.c
index 93651ab..eb3eef4 100644
--- a/gdb/macrotab.c
+++ b/gdb/macrotab.c
@@ -109,7 +109,7 @@ static const void *
macro_bcache (struct macro_table *t, const void *addr, int len)
{
if (t->bcache)
- return bcache (addr, len, t->bcache);
+ return bcache (addr, len, t->bcache, NULL, NULL);
else
{
void *copy = xmalloc (len);
diff --git a/gdb/psymtab.c b/gdb/psymtab.c
index bc47681..f4cdebd 100644
--- a/gdb/psymtab.c
+++ b/gdb/psymtab.c
@@ -1270,6 +1270,47 @@ start_psymtab_common (struct objfile *objfile,
return (psymtab);
}
+/* Calculate a hash code for the given partial symbol. The hash is
+ calculated using the symbol's value, language, domain, class
+ and name. These are the values which are set by
+ add_psymbol_to_bcache. */
+
+static unsigned long
+psymbol_hash (const void *addr)
+{
+ unsigned long h = 0;
+ struct partial_symbol *psymbol = (struct partial_symbol *) addr;
+ unsigned int lang = psymbol->ginfo.language;
+ unsigned int domain = PSYMBOL_DOMAIN (psymbol);
+ unsigned int class = PSYMBOL_CLASS (psymbol);
+
+ h = hash_continue (&psymbol->ginfo.value, sizeof (psymbol->ginfo.value), 0);
s/0/h/
+ h = hash_continue (&lang, sizeof (unsigned int), h);
+ h = hash_continue (&domain, sizeof (unsigned int), h);
+ h = hash_continue (&class, sizeof (unsigned int), h);
+ h = hash_continue (psymbol->ginfo.name, strlen (psymbol->ginfo.name), h);
+
+ return h;
+}
+
+/* Returns true if the symbol at addr1 equals the symbol at addr2.
+ For the comparison this function uses a symbols value,
+ language, domain, class and name. */
+
+static int
+psymbol_compare (const void *addr1, const void *addr2)
+{
+ struct partial_symbol *sym1 = (struct partial_symbol *) addr1;
+ struct partial_symbol *sym2 = (struct partial_symbol *) addr2;
+
+ return (memcmp (&sym1->ginfo.value, &sym1->ginfo.value,
+ sizeof (sym1->ginfo.value)) == 0
+ && sym1->ginfo.language == sym2->ginfo.language
+ && PSYMBOL_DOMAIN (sym1) == PSYMBOL_DOMAIN (sym2)
+ && PSYMBOL_CLASS (sym1) == PSYMBOL_CLASS (sym2)
+ && strcmp (sym1->ginfo.name, sym2->ginfo.name) == 0);
+}
+
/* Helper function, initialises partial symbol structure and stashes
it into objfile's bcache. Note that our caching mechanism will
use all fields of struct partial_symbol to determine hash value of the
@@ -1312,7 +1353,8 @@ add_psymbol_to_bcache (char *name, int
namelength, int copy_name,
/* Stash the partial symbol away in the cache */
return bcache_full (&psymbol, sizeof (struct partial_symbol),
- objfile->psymbol_cache, added);
+ objfile->psymbol_cache, added, psymbol_hash,
+ psymbol_compare);
}
/* Helper function, adds partial symbol to the given partial symbol
diff --git a/gdb/symfile.c b/gdb/symfile.c
index 2cb6b7f..5e7b7c2 100644
--- a/gdb/symfile.c
+++ b/gdb/symfile.c
@@ -2729,7 +2729,7 @@ allocate_symtab (char *filename, struct objfile *objfile)
obstack_alloc (&objfile->objfile_obstack, sizeof (struct symtab));
memset (symtab, 0, sizeof (*symtab));
symtab->filename = (char *) bcache (filename, strlen (filename) + 1,
- objfile->filename_cache);
+ objfile->filename_cache, NULL, NULL);
symtab->fullname = NULL;
symtab->language = deduce_language_from_filename (filename);
symtab->debugformat = "unknown";