This function caused a crash in 2.3.6 and I checked the trunk and it looks like it is still the same. Unfortunately, this is on a embedded system and it would be extremely difficult to try and replace the glibc with the more current version to verify the crash on the trunk. It looks like the variable num_ifs can be modified while it is being written to in another thread. The lock should also protect the num_ifs variable so that it can only be modified by a single thread. Modified the code to have the lock and unlock outside of the if statement and that stopped the crash. output from gdb along with stack trace: (gdb) info local ifr = (struct ifreq *) 0x506df0 i = 2 cur_ifr = (struct ifreq *) 0x506e18 sd = 14 num = 2 j = -1 num_ifs = 4 __PRETTY_FUNCTION__ = "_res_hconf_reorder_addrs" (gdb) bt #0 0x00002b54a31d1949 in kill () from /lib64/libc.so.6 #1 0x00002b54a2f034b4 in pthread_kill (thread=<value optimized out>, signo=6) at signals.c:69 #2 0x00002b54a2f034e3 in __pthread_raise (sig=6) at signals.c:200 #3 0x00002b54a31d16c1 in *__GI_raise (sig=6) at ../linuxthreads/sysdeps/unix/sysv/linux/raise.c:32 #4 0x00002b54a320a7ed in _int_realloc (av=0x2b54a33bb460, oldmem=0x50c520, bytes=<value optimized out>) at malloc.c:4587 #5 0x00002b54a320b894 in *__GI___libc_realloc (oldmem=0x50c520, bytes=48) at malloc.c:3487 #6 0x00002b54a325fe64 in _res_hconf_reorder_addrs (hp=0x2b54a35c0eb0) at res_hconf.c:589 #7 0x00002b54a32654dd in __gethostbyname_r (name=0x503314 "www.google.com", resbuf=0x0, buffer=0x2b54a35c0670 "?\b\\�T+", buflen=2048, result=0x2b54a35c0ed0, h_errnop=0x2b54a35c0edc) at ../nss/getXXbyYY_r.c:236 #8 0x00000000004017b5 in getipaddr (arg=<value optimized out>) at src/dns_calls.c:118 #9 0x00002b54a2f00ba3 in pthread_start_thread (arg=<value optimized out>) at manager.c:310 #10 0x00002b54a2f00bfe in pthread_start_thread_event (arg=0x506780) at manager.c:334 #11 0x00002b54a32545c3 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #12 0x0000000000000000 in ?? () patch: diff -ur glibc-2.3.5/resolv/res_hconf.c glibc-2.3.5_mine/resolv/res_hconf.c --- glibc-2.3.5/resolv/res_hconf.c 2004-06-01 15:17:39.000000000 -0700 +++ glibc-2.3.5_mine/resolv/res_hconf.c 2007-11-14 14:37:40.000000000 -0800 @@ -538,6 +538,10 @@ if (hp->h_addrtype != AF_INET) return; + /* Lock moved to the outside so noone can modify num_ifs */ + /* Get lock. */ + __libc_lock_lock (lock); + if (num_ifs <= 0) { struct ifreq *ifr, *cur_ifr; @@ -545,17 +549,21 @@ /* Save errno. */ int save = errno; - /* Initialize interface table. */ - num_ifs = 0; /* The SIOCGIFNETMASK ioctl will only work on an AF_INET socket. */ sd = __socket (AF_INET, SOCK_DGRAM, 0); if (sd < 0) + { + /* Unlock before returning */ + __libc_lock_unlock (lock); return; + } + - /* Get lock. */ - __libc_lock_lock (lock); + + /* Initialize interface table. */ + num_ifs = 0; /* Get a list of interfaces. */ __ifreq (&ifr, &num, sd); @@ -595,9 +603,10 @@ cleanup: /* Release lock, preserve error value, and close socket. */ save = errno; - __libc_lock_unlock (lock); __close (sd); } + /* Let other people in */ + __libc_lock_unlock (lock); if (num_ifs == 0) return;
The proposed patch would cause huge performance penalties. I've checked in another patch without this problem.