This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: [RFC] Use custom hash function with bcache

From: Doug Evans <dje at google dot com>
To: sami wagiaalla <swagiaal at redhat dot com>
Cc: gdb-patches at sourceware dot org
Date: Mon, 16 Aug 2010 11:28:53 -0700
Subject: Re: [RFC] Use custom hash function with bcache
References: <4C6946E1.6000709@redhat.com>
On Mon, Aug 16, 2010 at 7:10 AM, sami wagiaalla <swagiaal@redhat.com> wrote:
> This patch enables the use of custom hash and comparison functions when
> adding elements to a bcache. The patch also introduces hash and comparison
> functions which take into consideration only the relevant properties of the
> psymbol.
>
> Tested by running the test suit on F13 with gcc 4.4.4 on x8664, no
> regressions.
>
> also used 'main print statistics' to ensure that the bcache object count and
> unique object count are as expected with a small test case.
>
> Sami
>

Hi.  Comments inline with the patch.


2010-08-13  Sami Wagiaalla  <swagiaal@redhat.com>

	* psymtab.c (psymbol_hash): New function.
	(psymbol_compare): New function.
	(add_psymbol_to_bcache): pass psymbol_hash and psymbol_compare
	to bcache_full.
	* bcache.c (hash_continue): New.
	(hash): Use hash_continue.
	(bcache): Now takes hash_function, compare_function arguments.
	(bcache_full): Ditto.
	* bcache.c (bcache): Update prototype.
	(bcache_full): Ditto.
	* macrotab.c (macro_bcache): Updated.
	* symfile.c (allocate_symtab): Updated.

diff --git a/gdb/bcache.c b/gdb/bcache.c
index 7d9180c..bef2596 100644
--- a/gdb/bcache.c
+++ b/gdb/bcache.c
@@ -98,12 +98,19 @@ struct bcache
 unsigned long
 hash(const void *addr, int length)
 {
+  return hash_continue (addr, length, 0);
+}
+
+/* Continue the calculation of the hash H at the given address.  */
+
+unsigned long
+hash_continue (const void *addr, int length, unsigned long h)
+{
   const unsigned char *k, *e;
-  unsigned long h;

   k = (const unsigned char *)addr;
   e = k+length;
-  for (h=0; k< e;++k)
+  for (; k< e;++k)
     {
       h *=16777619;
       h ^= *k;
@@ -194,21 +201,34 @@ expand_hash_table (struct bcache *bcache)

 /* Find a copy of the LENGTH bytes at ADDR in BCACHE.  If BCACHE has
    never seen those bytes before, add a copy of them to BCACHE.  In
-   either case, return a pointer to BCACHE's copy of that string.  */
+   either case, return a pointer to BCACHE's copy of that string.
+   If HASH_FUNCTION and COMPARE_FUNCTION are not NULL they will be
+   used for hash calculation and comparing table elements respectively.
+   Otherwise the hash is calculated using the byte string and a
+   simple byte comparison is performed.  */

blank line between a function comment and definition

 const void *
-bcache (const void *addr, int length, struct bcache *bcache)
+bcache (const void *addr, int length, struct bcache *bcache,
+        unsigned long (*hash_function)(const void*),
+        int (*compare_function)(const void*, const void*))
 {
-  return bcache_full (addr, length, bcache, NULL);
+  return bcache_full (addr, length, bcache, NULL, hash_function,
+                      compare_function);
 }

I think it would be better to attach the hash and compare functions to
the bcache object.
E.g. add hash_function, compare_function parameters to bcache_xmalloc.

 /* Find a copy of the LENGTH bytes at ADDR in BCACHE.  If BCACHE has
    never seen those bytes before, add a copy of them to BCACHE.  In
    either case, return a pointer to BCACHE's copy of that string.  If
    optional ADDED is not NULL, return 1 in case of new entry or 0 if
-   returning an old entry.  */
+   returning an old entry.
+   If HASH_FUNCTION and COMPARE_FUNCTION are not NULL they will be
+   used for hash calculation and comparing table elements respectively.
+   Otherwise the hash is calculated using the byte string and a
+   simple byte comparison is performed.  */

 const void *
-bcache_full (const void *addr, int length, struct bcache *bcache, int *added)
+bcache_full (const void *addr, int length, struct bcache *bcache, int *added,
+             unsigned long (*hash_function)(const void*),
+             int (*compare_function)(const void*, const void*))
 {
   unsigned long full_hash;
   unsigned short half_hash;
@@ -235,7 +255,11 @@ bcache_full (const void *addr, int length, struct
bcache *bcache, int *added)
   bcache->total_count++;
   bcache->total_size += length;

-  full_hash = hash (addr, length);
+  if (hash_function == NULL)
+    full_hash = hash (addr, length);
+  else
+    full_hash = hash_function (addr);

[for reference sake]
Once hash_function is in struct bcache I'd store a pointer to the
default hasher, instead of having NULL mean "use default hasher".
[see below for compare_function - I wrote that part first]

+
   half_hash = (full_hash >> 16);
   hash_index = full_hash % bcache->num_buckets;

@@ -246,9 +270,18 @@ bcache_full (const void *addr, int length, struct
bcache *bcache, int *added)
     {
       if (s->half_hash == half_hash)
 	{
-	  if (s->length == length
-	      && ! memcmp (&s->d.data, addr, length))
-	    return &s->d.data;
+	  if (s->length == length)
+	    {
+	      int equal = 0;
+
+	      if (compare_function == NULL)
+		equal = (memcmp (&s->d.data, addr, length) == 0);
+	      else
+		equal = compare_function (&s->d.data, addr);

[for reference sake]
Once compare_function lives in struct bcache, I have a slight
preference for not having compare_function == NULL mean "use memcmp".
Instead store a pointer to memcmp (or wrapper) in the field (and then
you don't have to test for NULL above).  It would mean that all
compare_functions would get a length parameter that they may not use
(except perhaps in an assert that the value is the expected size), but
I think that's ok.
A NULL value for compare_function passed to bcache_xmalloc could mean
"use default" though.

+
+	      if (equal)
+		return &s->d.data;
+	    }
 	  else
 	    bcache->half_hash_miss_count++;
 	}
diff --git a/gdb/bcache.h b/gdb/bcache.h
index da69a2d..799c744 100644
--- a/gdb/bcache.h
+++ b/gdb/bcache.h
@@ -146,13 +146,17 @@ struct bcache;
    Since the cached value is ment to be read-only, return a const
    buffer.  */
 extern const void *bcache (const void *addr, int length,
-			   struct bcache *bcache);
+			   struct bcache *bcache,
+			   unsigned long (*hash_function)(const void*),
+			   int (*compare_function)(const void*, const void*));

 /* Like bcache, but if ADDED is not NULL, set *ADDED to true if the
    bytes were newly added to the cache, or to false if the bytes were
    found in the cache.  */
 extern const void *bcache_full (const void *addr, int length,
-				struct bcache *bcache, int *added);
+				struct bcache *bcache, int *added,
+				unsigned long (*hash_function)(const void*),
+				int (*compare_function)(const void*, const void*));

 /* Free all the storage used by BCACHE.  */
 extern void bcache_xfree (struct bcache *bcache);
@@ -169,5 +173,6 @@ extern int bcache_memory_used (struct bcache *bcache);

 /* The hash function */
 extern unsigned long hash(const void *addr, int length);
-
+extern unsigned long hash_continue (const void *addr, int length,
+                                    unsigned long h);

blank line

 #endif /* BCACHE_H */
diff --git a/gdb/macrotab.c b/gdb/macrotab.c
index 93651ab..eb3eef4 100644
--- a/gdb/macrotab.c
+++ b/gdb/macrotab.c
@@ -109,7 +109,7 @@ static const void *
 macro_bcache (struct macro_table *t, const void *addr, int len)
 {
   if (t->bcache)
-    return bcache (addr, len, t->bcache);
+    return bcache (addr, len, t->bcache, NULL, NULL);
   else
     {
       void *copy = xmalloc (len);
diff --git a/gdb/psymtab.c b/gdb/psymtab.c
index bc47681..f4cdebd 100644
--- a/gdb/psymtab.c
+++ b/gdb/psymtab.c
@@ -1270,6 +1270,47 @@ start_psymtab_common (struct objfile *objfile,
   return (psymtab);
 }

+/* Calculate a hash code for the given partial symbol.  The hash is
+   calculated using the symbol's value, language, domain, class
+   and name. These are the values which are set by
+   add_psymbol_to_bcache.  */
+
+static unsigned long
+psymbol_hash (const void *addr)
+{
+  unsigned long h = 0;
+  struct partial_symbol *psymbol = (struct partial_symbol *) addr;
+  unsigned int lang = psymbol->ginfo.language;
+  unsigned int domain = PSYMBOL_DOMAIN (psymbol);
+  unsigned int class = PSYMBOL_CLASS (psymbol);
+
+  h = hash_continue (&psymbol->ginfo.value, sizeof (psymbol->ginfo.value), 0);

s/0/h/

+  h = hash_continue (&lang, sizeof (unsigned int), h);
+  h = hash_continue (&domain, sizeof (unsigned int), h);
+  h = hash_continue (&class, sizeof (unsigned int), h);
+  h = hash_continue (psymbol->ginfo.name, strlen (psymbol->ginfo.name), h);
+
+  return h;
+}
+
+/* Returns true if the symbol at addr1 equals the symbol at addr2.
+   For the comparison this function uses a symbols value,
+   language, domain, class and name.  */
+
+static int
+psymbol_compare (const void *addr1, const void *addr2)
+{
+  struct partial_symbol *sym1 = (struct partial_symbol *) addr1;
+  struct partial_symbol *sym2 = (struct partial_symbol *) addr2;
+
+  return (memcmp (&sym1->ginfo.value, &sym1->ginfo.value,
+                  sizeof (sym1->ginfo.value)) == 0
+	  && sym1->ginfo.language == sym2->ginfo.language
+          && PSYMBOL_DOMAIN (sym1) == PSYMBOL_DOMAIN (sym2)
+          && PSYMBOL_CLASS (sym1) == PSYMBOL_CLASS (sym2)
+          && strcmp (sym1->ginfo.name, sym2->ginfo.name) == 0);
+}
+
 /* Helper function, initialises partial symbol structure and stashes
    it into objfile's bcache.  Note that our caching mechanism will
    use all fields of struct partial_symbol to determine hash value of the
@@ -1312,7 +1353,8 @@ add_psymbol_to_bcache (char *name, int
namelength, int copy_name,

   /* Stash the partial symbol away in the cache */
   return bcache_full (&psymbol, sizeof (struct partial_symbol),
-		      objfile->psymbol_cache, added);
+		      objfile->psymbol_cache, added, psymbol_hash,
+		      psymbol_compare);
 }

 /* Helper function, adds partial symbol to the given partial symbol
diff --git a/gdb/symfile.c b/gdb/symfile.c
index 2cb6b7f..5e7b7c2 100644
--- a/gdb/symfile.c
+++ b/gdb/symfile.c
@@ -2729,7 +2729,7 @@ allocate_symtab (char *filename, struct objfile *objfile)
     obstack_alloc (&objfile->objfile_obstack, sizeof (struct symtab));
   memset (symtab, 0, sizeof (*symtab));
   symtab->filename = (char *) bcache (filename, strlen (filename) + 1,
-				      objfile->filename_cache);
+				      objfile->filename_cache, NULL, NULL);
   symtab->fullname = NULL;
   symtab->language = deduce_language_from_filename (filename);
   symtab->debugformat = "unknown";
Follow-Ups:
- Re: [RFC] Use custom hash function with bcache
  - From: Doug Evans
References:
- [RFC] Use custom hash function with bcache
  - From: sami wagiaalla
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]