PLEASE TEST: New implementation of blocking socket I/O

Pierre A. Humblet Pierre.Humblet@ieee.org
Thu Apr 1 03:07:00 GMT 2004


At 05:36 PM 3/31/2004 +0200, Corinna Vinschen wrote:
>On Mar 31 09:55, Pierre A. Humblet wrote:
>> I am hesitant to start sinking time in this. If I understand correctly you
>> are trying to fix a race. They tend to go away while running under gdb.
>
>The race is gone with this code.  It's no the problem to debug it
>with gdb.
>
>> In 310663 = write (3, 0x7DE810, 1), it's likely that 310663 is larger
>> than the buffer size. Where can that come from? Can you add a trap?
>
>There's a chance that the first call to WSAwhatever already returns
>WSAEWOULDBLOCK and ret is uninitialized.  I've applied a fix which
>always set ret to 0 in calls to WSAwhatever.  Does that help?

It makes a difference, but I am not sure why, it shouldn't be necessary
AFAICS. At least I more or less consistently saw:

  264 5385988 [main] cvs 91960353 __set_errno: void
__set_winsock_errno(const char*, int):366 val 1
  154 5386142 [main] cvs 91960353 __set_winsock_errno: sendmsg:1100 -
winsock error 6 -> errno 1

When stepping through the code I noticed that wsock_event::release
was calling WSACloseEvent. Later the wsock_event destructor was calling
WSACloseEvent again, setting the wsock errno. So I fixed that, moving both
wsock_event::release and the destructor out of the way (in a crude way),
so as not to affect the wsock errno.

Then I started seeing

  279 3295264 [main] cvs 295057 writev: 1024 = write (3, 0x7DE7E0, 1), errno 2
  344 3295608 [main] cvs 295057 sig_dispatch_pending: exit_state 0, cur
thread id 0xFFF0A123, sigtid 0xFFEF130B, sigq.start.next 0x0
  188 3295796 [main] cvs 295057 writev: writev (3, 0x7DE810, 1)
  433 3296229 [main] cvs 295057 __set_errno: void __set_winsock_errno(const
char*, int):367 val 0
  209 3296438 [main] cvs 295057 __set_winsock_errno: sendmsg:1102 - winsock
error 0 -> errno 0

i.e. it fails but winsock_errno is 0. I made a couple more changes in
wsock_event::wait where the code could wipe out the wsock_errno (see patches).

No luck, wsock errno is still 0, but sendmsg surely fails.

When I put a breakpoint in the wait loop, nothing bad happens.
Debugging non-blocking and wait stuff with gdb is a long shot.

And yes, from time to time it does like in my first report, hanging
  158 2552618 [main] cvs 540017 readv: readv (4, 0x7DEBC0, 1) blocking,
sigcatchers 6
  154 2552772 [main] cvs 540017 readv: no need to call ready_for_read

I have to stop for now, I need to sleep over this anyway.
 
Perhaps you would have more luck triggering this bug if
you tried a low speed (modem) connection.

Pierre

P.S.: The diff is FYI, not meant to be permanent.  
-------------- next part --------------
Index: wsock_event.h
===================================================================
RCS file: /cvs/src/src/winsup/cygwin/wsock_event.h,v
retrieving revision 1.2
diff -u -p -r1.2 wsock_event.h
--- wsock_event.h	29 Mar 2004 19:41:17 -0000	1.2
+++ wsock_event.h	1 Apr 2004 02:47:36 -0000
@@ -16,6 +16,7 @@ class wsock_event
   WSAEVENT		event;
 public:
   wsock_event () : event (NULL) {};
+  bool exists () { return event ; }
   ~wsock_event ()
     {
       if (event)
Index: fhandler_socket.cc
===================================================================
RCS file: /cvs/src/src/winsup/cygwin/fhandler_socket.cc,v
retrieving revision 1.125
diff -u -p -r1.125 fhandler_socket.cc
--- fhandler_socket.cc	31 Mar 2004 15:33:33 -0000	1.125
+++ fhandler_socket.cc	1 Apr 2004 02:47:39 -0000
@@ -964,6 +964,9 @@ fhandler_socket::sendto (const void *ptr
   else
     res = ret;

+  if (res > len)
+      try_to_debug(0);
+
   /* Special handling for EPIPE and SIGPIPE.

      EPIPE is generated if the local end has been shut down on a connection
@@ -980,6 +983,7 @@ fhandler_socket::sendto (const void *ptr
   return res;
 }

+int count = 0;
 int
 fhandler_socket::sendmsg (const struct msghdr *msg, int flags, ssize_t tot)
 {
@@ -1063,6 +1067,7 @@ fhandler_socket::sendmsg (const struct m
       }

       DWORD ret;
+      wsock_event wsock_evt;

       if (is_nonblocking () || has_been_closed)
 	res = WSASendTo (get_socket (), wsabuf, iovcnt, (ret = 0, &ret),
@@ -1070,7 +1075,6 @@ fhandler_socket::sendmsg (const struct m
 			 msg->msg_namelen, NULL, NULL);
       else
 	{
-	  wsock_event wsock_evt;
 	  if (wsock_evt.prepare (get_socket (), FD_CLOSE | FD_WRITE))
 	    {
               do
@@ -1082,6 +1086,7 @@ fhandler_socket::sendmsg (const struct m
                   if (res != SOCKET_ERROR
                       || WSAGetLastError () != WSAEWOULDBLOCK)
                     break;
+		  count++;
                   if (ret > 0)
                     {
                       res = 0;
@@ -1089,7 +1094,6 @@ fhandler_socket::sendmsg (const struct m
                     }
                 }
               while (!(res = wsock_evt.wait (get_socket (), has_been_closed)));
-	      wsock_evt.release (get_socket ());
 	    }
 	}

@@ -1097,7 +1101,14 @@ fhandler_socket::sendmsg (const struct m
 	set_winsock_errno ();
       else
 	res = ret;
+
+      if (wsock_evt.exists ())
+	wsock_evt.release (get_socket ());
     }
+
+  if (res > tot)
+      try_to_debug(0);
+

   /* Special handling for EPIPE and SIGPIPE.

Index: net.cc
===================================================================
RCS file: /cvs/src/src/winsup/cygwin/net.cc,v
retrieving revision 1.165
diff -u -p -r1.165 net.cc
--- net.cc	31 Mar 2004 12:04:07 -0000	1.165
+++ net.cc	1 Apr 2004 02:47:42 -0000
@@ -75,35 +75,37 @@ wsock_event::wait (int sock, int &closed
     {
       case WSA_WAIT_EVENT_0:
         WSANETWORKEVENTS evts;
-	memset (&evts, 0, sizeof evts);
-	WSAEnumNetworkEvents (sock, event, &evts);
-	if (evts.lNetworkEvents & FD_READ)
+	memset (&evts, 0, sizeof evts);  /* IS THIS NEEDED? */
+	if (!WSAEnumNetworkEvents (sock, event, &evts))
 	  {
-	    if (evts.iErrorCode[FD_READ_BIT])
-	      wsa_err = evts.iErrorCode[FD_READ_BIT];
-	    else
-	      ret = 0;
+	    if (evts.lNetworkEvents & FD_READ)
+	      {
+		if (evts.iErrorCode[FD_READ_BIT])
+		  wsa_err = evts.iErrorCode[FD_READ_BIT];
+		else
+		  ret = 0;
+	      }
+	    else if (evts.lNetworkEvents & FD_WRITE)
+	      {
+		if (evts.iErrorCode[FD_WRITE_BIT])
+		  wsa_err = evts.iErrorCode[FD_WRITE_BIT];
+		else
+		  ret = 0;
+	      }
+	    if (evts.lNetworkEvents & FD_CLOSE)
+	      {
+		closed = 1;
+		if (!wsa_err && evts.iErrorCode[FD_CLOSE_BIT])
+		  wsa_err = evts.iErrorCode[FD_CLOSE_BIT];
+		else
+		  ret = 0;
+	      }
+	    if (wsa_err)
+	      WSASetLastError (wsa_err);
 	  }
-	else if (evts.lNetworkEvents & FD_WRITE)
-	  {
-	    if (evts.iErrorCode[FD_WRITE_BIT])
-	      wsa_err = evts.iErrorCode[FD_WRITE_BIT];
-	    else
-	      ret = 0;
-	  }
-	if (evts.lNetworkEvents & FD_CLOSE)
-	  {
-	    closed = 1;
-	    if (!wsa_err && evts.iErrorCode[FD_CLOSE_BIT])
-	      wsa_err = evts.iErrorCode[FD_CLOSE_BIT];
-	    else
-	      ret = 0;
-	  }
-	if (wsa_err)
-	  WSASetLastError (wsa_err);
 	break;
       case WSA_WAIT_EVENT_0 + 1:
-        WSASetLastError (WSAEINTR);
+	WSASetLastError (WSAEINTR);
 	break;
       default:
 	WSASetLastError (WSAEFAULT);
@@ -116,6 +118,7 @@ wsock_event::release (int sock)
 {
   WSAEventSelect (sock, event, 0);
   WSACloseEvent (event);
+  event = NULL;
   unsigned long non_block = 0;
   ioctlsocket (sock, FIONBIO, &non_block);
 }


More information about the Cygwin-developers mailing list