This is the mail archive of the glibc-bugs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug libc/2571] New: getaddrinfo and gethostbyname_r crash on SMP machines


I experienced that getaddrinfo and gethostbyname_r randomly crash.
The following shows how to reproduce.
Probably you need a SMP machine to hit the bug.

$ uname -a
Linux ______ 2.6.16-1.2080_FC5 #1 SMP Tue Mar 28 03:38:47 EST 2006 x86_64 x86_64
x86_64 GNU/Linux
$ cat gethostby.c

#include <stdio.h>
#include <stdlib.h>
#include <netdb.h>
#include <pthread.h>

static void *
resolve_test_thread_main(void *thrarg)
{
  struct hostent h, *hp = NULL;
  int herr;
  char buffer[1024];
  if (gethostbyname_r("localhost", &h, buffer, 1024, &hp, &herr) != 0) {
    abort();
  }
  return NULL;
}

int main()
{
  int i;
  pthread_t thrs[100];
  const int num = 10;
  for (i = 0; i < num; ++i) {
    if (pthread_create(thrs + i, NULL, resolve_test_thread_main, NULL) != 0) {
      abort();
    }
  }
  for (i = 0; i < num; ++i) {
    void *thret;
    if (pthread_join(thrs[i], &thret) != 0) {
      abort();
    }
  }
  return 0;
}

$ gcc -I/opt/glibc/include/ -L /opt/glibc/lib/ -Wl,--dynamic-linker=/opt/glibc/l
ib/ld-2.4.so gethostby.c -o gethostby -lpthread
$ while ./gethostby ; do true; done;
Segmentation fault (core dumped)
$ gdb ./gethostby core.21289
GNU gdb Red Hat Linux (6.3.0.0-1.122rh)
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "x86_64-redhat-linux-gnu"...Using host libthread_db
library "/lib64/libthread_db.so.1".

Core was generated by `./gethostby'.
Program terminated with signal 11, Segmentation fault.
Reading symbols from /opt/glibc/lib/libpthread.so.0...done.
Loaded symbols for /opt/glibc/lib/libpthread.so.0
Reading symbols from /opt/glibc/lib/libc.so.6...done.
Loaded symbols for /opt/glibc/lib/libc.so.6
Reading symbols from /opt/glibc/lib/ld-2.4.so...done.
Loaded symbols for /opt/glibc/lib/ld-2.4.so
#0  0x00002aaaaadc4f76 in get_mapping (type=GETFDHST,
    key=0x2aaaaaddf4d1 "hosts", mappedp=0x2aaaaaf07078) at nscd_helper.c:310
310       if (oldval != NULL && atomic_decrement_val (&oldval->counter) == 0)
(gdb) p oldval
$1 = (struct mapped_database *) 0xffffffffffffffff
(gdb) bt
#0  0x00002aaaaadc4f76 in get_mapping (type=GETFDHST,
    key=0x2aaaaaddf4d1 "hosts", mappedp=0x2aaaaaf07078) at nscd_helper.c:310
#1  0x00002aaaaadc51c3 in __nscd_get_map_ref (type=GETFDHST,
    name=0x2aaaaaddf4d1 "hosts", mapptr=0x2aaaaaf07070, gc_cyclep=0x409ffc3c)
    at nscd_helper.c:343
#2  0x00002aaaaadc3a19 in nscd_gethst_r (key=0x4007b8 "localhost", keylen=10,
    type=GETHOSTBYNAME, resultbuf=0x40a00180, buffer=0x409ffd70 "",
    buflen=1024, result=0x40a00178, h_errnop=0x40a00174) at nscd_gethst_r.c:117
#3  0x00002aaaaadc420a in __nscd_gethostbyname_r (name=0x4007b8 "localhost",
    resultbuf=0x40a00180, buffer=0x409ffd70 "", buflen=1024,
    result=0x40a00178, h_errnop=0x40a00174) at nscd_gethst_r.c:52
#4  0x00002aaaaadab708 in __gethostbyname_r (name=0x4007b8 "localhost",
resbuf=Variable "resbuf" is not available.

) at ../nss/getXXbyYY_r.c:162
#5  0x0000000000400620 in resolve_test_thread_main ()
#6  0x00002aaaaabcc367 in start_thread (arg=Variable "arg" is not available.
) at pthread_create.c:274
#7  0x00002aaaaad97b3d in clone () from /opt/glibc/lib/libc.so.6
#8  0x0000000000000000 in ?? ()
(gdb)


The crash is caused at 'oldval->counter' wherevoldval == NO_MAPPING.
This is very odd because get_mapping() is not called
if mapptr->mapped == NO_MAPPING (in __nscd_get_map_ref()). I disassembled
nscd_helper.o and found that, in __nscd_get_map_ref(), the line
  cur = mapptr->mapped;
after
  while (atomic_compare_and_exchange_val_acq(&mapptr->lock, 1, 0) != 0)
is optimized away. Because atomic_xxx_acq operations have no "memory" clobber,
gcc thinks 'cur' need not to be reloaded from mapptr->mapped.

The following patch fixes the problem. I changed the line
  mapptr->lock = 0;
as well, because this store operation must be with release semantics (or
appending write memory barrier before the store would be ok).

I only appended "memory" for i386 and x86_64. I'm not sure if we need to
change for other architectures also.

--- sysdeps/i386/i486/bits/atomic.h.org	2006-04-13 17:30:57.000000000 +0900
+++ sysdeps/i386/i486/bits/atomic.h	2006-04-13 17:49:28.000000000 +0900
@@ -59,21 +59,24 @@
   ({ __typeof (*mem) ret;						      \
      __asm __volatile (LOCK_PREFIX "cmpxchgb %b2, %1"			      \
 		       : "=a" (ret), "=m" (*mem)			      \
-		       : "q" (newval), "m" (*mem), "0" (oldval));	      \
+		       : "q" (newval), "m" (*mem), "0" (oldval)		      \
+		       : "memory");					      \
      ret; })
 
 #define __arch_compare_and_exchange_val_16_acq(mem, newval, oldval) \
   ({ __typeof (*mem) ret;						      \
      __asm __volatile (LOCK_PREFIX "cmpxchgw %w2, %1"			      \
 		       : "=a" (ret), "=m" (*mem)			      \
-		       : "r" (newval), "m" (*mem), "0" (oldval));	      \
+		       : "r" (newval), "m" (*mem), "0" (oldval)		      \
+		       : "memory");					      \
      ret; })
 
 #define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
   ({ __typeof (*mem) ret;						      \
      __asm __volatile (LOCK_PREFIX "cmpxchgl %2, %1"			      \
 		       : "=a" (ret), "=m" (*mem)			      \
-		       : "r" (newval), "m" (*mem), "0" (oldval));	      \
+		       : "r" (newval), "m" (*mem), "0" (oldval)		      \
+		       : "memory");					      \
      ret; })
 
 /* XXX We do not really need 64-bit compare-and-exchange.  At least
@@ -98,7 +101,8 @@
 			 "c" (((unsigned long long int) (newval)) >> 32),     \
 			 "m" (*mem), "a" (((unsigned long long int) (oldval)) \
 					  & 0xffffffff),		      \
-			 "d" (((unsigned long long int) (oldval)) >> 32));    \
+			 "d" (((unsigned long long int) (oldval)) >> 32)      \
+		       : "memory");					      \
      ret; })
 # else
 #  define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
@@ -110,7 +114,8 @@
 			 "c" (((unsigned long long int) (newval)) >> 32),     \
 			 "m" (*mem), "a" (((unsigned long long int) (oldval)) \
 					  & 0xffffffff),		      \
-			 "d" (((unsigned long long int) (oldval)) >> 32));    \
+			 "d" (((unsigned long long int) (oldval)) >> 32)      \
+		       : "memory");					      \
      ret; })
 # endif
 #endif
@@ -122,15 +127,18 @@
      if (sizeof (*mem) == 1)						      \
        __asm __volatile ("xchgb %b0, %1"				      \
 			 : "=r" (result), "=m" (*mem)			      \
-			 : "0" (newvalue), "m" (*mem));			      \
+			 : "0" (newvalue), "m" (*mem)			      \
+			 : "memory");					      \
      else if (sizeof (*mem) == 2)					      \
        __asm __volatile ("xchgw %w0, %1"				      \
 			 : "=r" (result), "=m" (*mem)			      \
-			 : "0" (newvalue), "m" (*mem));			      \
+			 : "0" (newvalue), "m" (*mem)			      \
+			 : "memory");					      \
      else if (sizeof (*mem) == 4)					      \
        __asm __volatile ("xchgl %0, %1"					      \
 			 : "=r" (result), "=m" (*mem)			      \
-			 : "0" (newvalue), "m" (*mem));			      \
+			 : "0" (newvalue), "m" (*mem)			      \
+			 : "memory");					      \
      else								      \
        {								      \
 	 result = 0;							      \
--- libc/sysdeps/x86_64/bits/atomic.h.org	2006-04-13 14:11:24.000000000 +0900
+++ libc/sysdeps/x86_64/bits/atomic.h	2006-04-13 14:14:14.000000000 +0900
@@ -59,21 +59,24 @@
   ({ __typeof (*mem) ret;						      \
      __asm __volatile (LOCK_PREFIX "cmpxchgb %b2, %1"			      \
 		       : "=a" (ret), "=m" (*mem)			      \
-		       : "q" (newval), "m" (*mem), "0" (oldval));	      \
+		       : "q" (newval), "m" (*mem), "0" (oldval)		      \
+		       : "memory");					      \
      ret; })
 
 #define __arch_compare_and_exchange_val_16_acq(mem, newval, oldval) \
   ({ __typeof (*mem) ret;						      \
      __asm __volatile (LOCK_PREFIX "cmpxchgw %w2, %1"			      \
 		       : "=a" (ret), "=m" (*mem)			      \
-		       : "r" (newval), "m" (*mem), "0" (oldval));	      \
+		       : "r" (newval), "m" (*mem), "0" (oldval)		      \
+		       : "memory");					      \
      ret; })
 
 #define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
   ({ __typeof (*mem) ret;						      \
      __asm __volatile (LOCK_PREFIX "cmpxchgl %2, %1"			      \
 		       : "=a" (ret), "=m" (*mem)			      \
-		       : "r" (newval), "m" (*mem), "0" (oldval));	      \
+		       : "r" (newval), "m" (*mem), "0" (oldval)		      \
+		       : "memory");					      \
      ret; })
 
 #define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
@@ -81,7 +84,8 @@
      __asm __volatile (LOCK_PREFIX "cmpxchgq %q2, %1"			      \
 		       : "=a" (ret), "=m" (*mem)			      \
 		       : "r" ((long) (newval)), "m" (*mem),		      \
-			 "0" ((long) (oldval)));			      \
+			 "0" ((long) (oldval))				      \
+		       : "memory");					      \
      ret; })
 
 
@@ -91,19 +95,23 @@
      if (sizeof (*mem) == 1)						      \
        __asm __volatile ("xchgb %b0, %1"				      \
 			 : "=r" (result), "=m" (*mem)			      \
-			 : "0" (newvalue), "m" (*mem));			      \
+			 : "0" (newvalue), "m" (*mem)			      \
+			 : "memory");					      \
      else if (sizeof (*mem) == 2)					      \
        __asm __volatile ("xchgw %w0, %1"				      \
 			 : "=r" (result), "=m" (*mem)			      \
-			 : "0" (newvalue), "m" (*mem));			      \
+			 : "0" (newvalue), "m" (*mem)			      \
+			 : "memory");					      \
      else if (sizeof (*mem) == 4)					      \
        __asm __volatile ("xchgl %0, %1"					      \
 			 : "=r" (result), "=m" (*mem)			      \
-			 : "0" (newvalue), "m" (*mem));			      \
+			 : "0" (newvalue), "m" (*mem)			      \
+			 : "memory");					      \
      else								      \
        __asm __volatile ("xchgq %q0, %1"				      \
 			 : "=r" (result), "=m" (*mem)			      \
-			 : "0" ((long) (newvalue)), "m" (*mem));	      \
+			 : "0" ((long) (newvalue)), "m" (*mem)		      \
+			 : "memory");					      \
      result; })
 
 
--- libc/nscd/nscd_helper.c.org	2006-04-13 10:17:12.000000000 +0900
+++ libc/nscd/nscd_helper.c	2006-04-13 17:22:04.000000000 +0900
@@ -352,7 +352,7 @@
 	}
     }
 
-  mapptr->lock = 0;
+  atomic_exchange_rel (&mapptr->lock, 0);
 
   return cur;
 }

-- 
           Summary: getaddrinfo and gethostbyname_r crash on SMP machines
           Product: glibc
           Version: 2.4
            Status: NEW
          Severity: critical
          Priority: P2
         Component: libc
        AssignedTo: drepper at redhat dot com
        ReportedBy: a-higuti at gray dot plala dot or dot jp
                CC: a-higuti at gray dot plala dot or dot jp,glibc-bugs at
                    sources dot redhat dot com


http://sourceware.org/bugzilla/show_bug.cgi?id=2571

------- You are receiving this mail because: -------
You are on the CC list for the bug, or are watching someone who is.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]