This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
Re: [PATCH] DT_GNU_HASH: ~ 50% dynamic linking improvement
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Paul Eggert <eggert at CS dot UCLA dot EDU>
- Cc: binutils at sources dot redhat dot com, libc-alpha at sources dot redhat dot com, Michael Meeks <michael dot meeks at novell dot com>
- Date: Wed, 28 Jun 2006 22:24:58 +0200
- Subject: Re: [PATCH] DT_GNU_HASH: ~ 50% dynamic linking improvement
- References: <20060628170900.GX3823@sunsite.mff.cuni.cz> <87odwdyv8n.fsf@penguin.cs.ucla.edu>
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
On Wed, Jun 28, 2006 at 11:45:44AM -0700, Paul Eggert wrote:
> Jakub Jelinek <jakub@redhat.com> writes:
>
> > (Dan Bernstein's string hash function posted eons ago on comp.lang.c.)
>
> Bernstein now prefers XOR, i.e.,
> "h = h * 33 ^ c;" instead of
> "h = h * 33 + c;". Did you try that as well?
> Several sources indicate it's a bit better, e.g.,
> <http://eternallyconfuzzled.com/tuts/hashing.html#djb2>.
>
> > We have tested a bunch of different hash functions
>
> Which hash functions did you try? One-at-a-time? FNV? You'll
> probably get hassled by hash triviists (like me :-) no matter which
> function you choose, but you can forstall that to some extent by
> mentioning which functions you tested.
Ok, added a few further functions from the URL you referenced (previously
just had sysv, libiberty, dcache, djb2 and sdbm).
The number of collisions in the 537403 symbols is:
name 2sym collision # 3sym collision # more than 3sym collision #
sysv 1749 5
libiberty 42
dcache 37
djb2 29
sdbm 39
djb3 31
rot 337 39 61
sax 34
fnv 40
oat 30
Speed and number of collisions on the set of all symbols with
full 32-bit hash values (i.e. not modulo nbuckets) are the most important
factors in this case, as handling such collisions is expensive.
Also, djb2 (i.e. Bernstein with +) gave up max common prefix len
3, while djb3 gives 4, fnv 4 as well, oat and sax too.
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
unsigned int
elf_hash (const char *namearg)
{
const unsigned char *name = (const unsigned char *) namearg;
unsigned int g, h = 0;
int ch;
while ((ch = *name++) != '\0')
{
h = (h << 4) + ch;
if ((g = (h & 0xf0000000)) != 0)
{
h ^= g >> 24;
h ^= g;
}
}
return h;
}
unsigned int
libiberty_hash (const char *namearg)
{
const unsigned char *name = (const unsigned char *) namearg;
unsigned int h = 0;
int ch;
while ((ch = *name++) != '\0')
h = h * 67 + ch - 113;
return h;
}
unsigned int
dcache_hash (const char *namearg)
{
const unsigned char *name = (const unsigned char *) namearg;
unsigned int h = 0;
int ch;
while ((ch = *name++) != '\0')
h = (h + (ch << 4) + (ch >> 4)) * 11;
return h;
}
unsigned int
djb2_hash (const char *namearg)
{
const unsigned char *name = (const unsigned char *) namearg;
unsigned int h = 5381;
int ch;
while ((ch = *name++) != '\0')
h = (h << 5) + h + ch;
return h;
}
unsigned int
sdbm_hash (const char *namearg)
{
const unsigned char *name = (const unsigned char *) namearg;
unsigned int h = 0;
int ch;
while ((ch = *name++) != '\0')
h = ch + (h << 6) + (h << 16) - h;
return h;
}
unsigned int
djb3_hash (const char *namearg)
{
const unsigned char *name = (const unsigned char *) namearg;
unsigned int h = 5381;
int ch;
while ((ch = *name++) != '\0')
h = ((h << 5) + h) ^ ch;
return h;
}
unsigned int
rot_hash (const char *namearg)
{
const unsigned char *name = (const unsigned char *) namearg;
unsigned int h = 0;
int ch;
while ((ch = *name++) != '\0')
h = ch ^ (h << 4) ^ (h >> 28);
return h;
}
unsigned int
sax_hash (const char *namearg)
{
const unsigned char *name = (const unsigned char *) namearg;
unsigned int h = 0;
int ch;
while ((ch = *name++) != '\0')
h ^= ch ^ (h << 5) + (h >> 2);
return h;
}
unsigned int
fnv_hash (const char *namearg)
{
const unsigned char *name = (const unsigned char *) namearg;
unsigned int h = 2166136261;
int ch;
while ((ch = *name++) != '\0')
h = (h * 16777619) ^ ch;
return h;
}
unsigned int
oat_hash (const char *namearg)
{
const unsigned char *name = (const unsigned char *) namearg;
unsigned int h = 2166136261;
int ch;
while ((ch = *name++) != '\0')
{
h += ch;
h += (h << 10);
h ^= (h >> 6);
}
h += (h << 3);
h ^= (h >> 11);
h += (h << 15);
return h;
}
int
main (int argc, char **argv)
{
char *line = NULL;
size_t len = 0;
ssize_t n;
unsigned int (*hash) (const char *);
if (strcmp (argv[1], "sysv") == 0)
hash = elf_hash;
else if (strcmp (argv[1], "libiberty") == 0)
hash = libiberty_hash;
else if (strcmp (argv[1], "dcache") == 0)
hash = dcache_hash;
else if (strcmp (argv[1], "djb2") == 0)
hash = djb2_hash;
else if (strcmp (argv[1], "sdbm") == 0)
hash = sdbm_hash;
else if (strcmp (argv[1], "djb3") == 0)
hash = djb3_hash;
else if (strcmp (argv[1], "rot") == 0)
hash = rot_hash;
else if (strcmp (argv[1], "sax") == 0)
hash = sax_hash;
else if (strcmp (argv[1], "fnv") == 0)
hash = fnv_hash;
else if (strcmp (argv[1], "oat") == 0)
hash = oat_hash;
else
return 1;
while ((n = getline (&line, &len, stdin)) > 0)
{
if (line[n - 1] == '\n')
line[n - 1] = '\0';
printf ("%08x %s\n", hash (line), line);
}
return 0;
}
Jakub