[PATCH 06/18] linux: Only use 64-bit syscall if required for select

Adhemerval Zanella adhemerval.zanella@linaro.org
Thu Jun 17 11:50:52 GMT 2021


For !__ASSUME_TIME64_SYSCALLS there is no need to issue a 64-bit syscall
if the provided timeout fits in a 32-bit one.  The 64-bit usage should
be rare since the timeout is a relative one.  This also avoids the need
to use supports_time64() (which breaks the usage case of live migration
like CRIU or similar).

It also fixes an issue on 32-bit select call for !__ASSUME_PSELECT
(microblase with older kernels only) where the expected timeout
is a 'struct timeval' instead of 'struct timespec'.

Checked on i686-linux-gnu on a 4.15 kernel and on a 5.11 kernel
(with and without --enable-kernel=5.1) and on x86_64-linux-gnu.
---
 include/sys/select.h               |  5 +++
 misc/Makefile                      |  2 +
 misc/tst-select.c                  | 39 +++++++++++--------
 sysdeps/unix/sysv/linux/Makefile   |  2 +-
 sysdeps/unix/sysv/linux/select.c   | 60 ++++++++++--------------------
 sysdeps/unix/sysv/linux/select32.c | 58 +++++++++++++++++++++++++++++
 6 files changed, 109 insertions(+), 57 deletions(-)
 create mode 100644 sysdeps/unix/sysv/linux/select32.c

diff --git a/include/sys/select.h b/include/sys/select.h
index ec073deeba..a8961afbed 100644
--- a/include/sys/select.h
+++ b/include/sys/select.h
@@ -21,6 +21,11 @@ extern int __pselect32 (int __nfds, fd_set *__readfds,
 			const struct __timespec64 *__timeout,
 			const __sigset_t *__sigmask)
   attribute_hidden;
+extern int __select32 (int __nfds, fd_set *__readfds,
+		       fd_set *__writefds, fd_set *__exceptfds,
+		       const struct __timespec64 *ts64,
+		       struct __timeval64 *timeout)
+  attribute_hidden;
 
 extern int __select64 (int __nfds, fd_set *__readfds,
 		       fd_set *__writefds, fd_set *__exceptfds,
diff --git a/misc/Makefile b/misc/Makefile
index fa40bf0e11..66586bcc7e 100644
--- a/misc/Makefile
+++ b/misc/Makefile
@@ -169,5 +169,7 @@ $(objpfx)tst-allocate_once-mem.out: $(objpfx)tst-allocate_once.out
 	$(common-objpfx)malloc/mtrace $(objpfx)tst-allocate_once.mtrace > $@; \
 	$(evaluate-test)
 
+$(objpfx)tst-select: $(librt)
+$(objpfx)tst-select-time64: $(librt)
 $(objpfx)tst-pselect: $(librt)
 $(objpfx)tst-pselect-time64: $(librt)
diff --git a/misc/tst-select.c b/misc/tst-select.c
index 52aa26651f..134eed99be 100644
--- a/misc/tst-select.c
+++ b/misc/tst-select.c
@@ -17,6 +17,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <errno.h>
+#include <intprops.h>
 #include <support/capture_subprocess.h>
 #include <support/check.h>
 #include <support/support.h>
@@ -31,12 +32,6 @@ struct child_args
   struct timeval tmo;
 };
 
-static void
-alarm_handler (int signum)
-{
-  /* Do nothing.  */
-}
-
 static void
 do_test_child (void *clousure)
 {
@@ -69,17 +64,20 @@ do_test_child (void *clousure)
 static void
 do_test_child_alarm (void *clousure)
 {
-  struct sigaction act = { .sa_handler = alarm_handler };
-  xsigaction (SIGALRM, &act, NULL);
-  alarm (1);
+  struct child_args *args = (struct child_args *) clousure;
 
-  struct timeval tv = { .tv_sec = 10, .tv_usec = 0 };
+  support_create_timer (0, 100000000, false, NULL);
+  struct timeval tv = { .tv_sec = args->tmo.tv_sec, .tv_usec = 0 };
   int r = select (0, NULL, NULL, NULL, &tv);
   TEST_COMPARE (r, -1);
-  TEST_COMPARE (errno, EINTR);
-
-  if (support_select_modifies_timeout ())
-    TEST_VERIFY (tv.tv_sec < 10);
+  if (args->tmo.tv_sec > INT_MAX)
+    TEST_VERIFY (errno == EINTR || errno == EOVERFLOW);
+  else
+    {
+      TEST_COMPARE (errno, EINTR);
+      if (support_select_modifies_timeout ())
+       TEST_VERIFY (tv.tv_sec < args->tmo.tv_sec);
+    }
 }
 
 static int
@@ -121,13 +119,24 @@ do_test (void)
   xclose (args.fds[0][0]);
   xclose (args.fds[1][1]);
 
+  args.tmo = (struct timeval) { .tv_sec = 10, .tv_usec = 0 };
+  {
+    struct support_capture_subprocess result;
+    result = support_capture_subprocess (do_test_child_alarm, &args);
+    support_capture_subprocess_check (&result, "tst-select-child", 0,
+				      sc_allow_none);
+  }
+
+  args.tmo = (struct timeval) { .tv_sec = TYPE_MAXIMUM (time_t),
+				.tv_usec = 0 };
   {
     struct support_capture_subprocess result;
-    result = support_capture_subprocess (do_test_child_alarm, NULL);
+    result = support_capture_subprocess (do_test_child_alarm, &args);
     support_capture_subprocess_check (&result, "tst-select-child", 0,
 				      sc_allow_none);
   }
 
+  args.tmo = (struct timeval) { .tv_sec = 0, .tv_usec = 0 };
   {
     fd_set rfds;
     FD_ZERO (&rfds);
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
index c36ea0e494..710169a454 100644
--- a/sysdeps/unix/sysv/linux/Makefile
+++ b/sysdeps/unix/sysv/linux/Makefile
@@ -61,7 +61,7 @@ sysdep_routines += adjtimex clone umount umount2 readahead sysctl \
 		   open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get \
 		   timerfd_gettime timerfd_settime prctl \
 		   process_vm_readv process_vm_writev clock_adjtime \
-		   time64-support pselect32 \
+		   time64-support pselect32 select32 \
 		   xstat fxstat lxstat xstat64 fxstat64 lxstat64 \
 		   fxstatat fxstatat64 \
 		   xmknod xmknodat convert_scm_timestamps
diff --git a/sysdeps/unix/sysv/linux/select.c b/sysdeps/unix/sysv/linux/select.c
index dc16a816ed..2d2a7fa720 100644
--- a/sysdeps/unix/sysv/linux/select.c
+++ b/sysdeps/unix/sysv/linux/select.c
@@ -21,7 +21,6 @@
 #include <sys/select.h>
 #include <errno.h>
 #include <sysdep-cancel.h>
-#include <time64-support.h>
 
 /* Check the first NFDS descriptors each in READFDS (if not NULL) for read
    readiness, in WRITEFDS (if not NULL) for write readiness, and in EXCEPTFDS
@@ -65,53 +64,32 @@ __select64 (int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
 #ifndef __NR_pselect6_time64
 # define __NR_pselect6_time64 __NR_pselect6
 #endif
+
+#ifdef __ASSUME_TIME64_SYSCALLS
+  int r = SYSCALL_CANCEL (pselect6_time64, nfds, readfds, writefds, exceptfds,
+			  pts64, NULL);
+  if (timeout != NULL)
+    TIMESPEC_TO_TIMEVAL (timeout, pts64);
+  return r;
+#else
+  bool is32bit = timeout != NULL
+		 ? in_time_t_range (timeout->tv_sec) : true;
   int r;
-  if (supports_time64 ())
+  if (!is32bit)
     {
-      r = SYSCALL_CANCEL (pselect6_time64, nfds, readfds, writefds, exceptfds,
-			  pts64, NULL);
-      /* Linux by default will update the timeout after a pselect6 syscall
-         (though the pselect() glibc call suppresses this behavior).
-         Since select() on Linux has the same behavior as the pselect6
-         syscall, we update the timeout here.  */
-      if (r >= 0 || errno != ENOSYS)
+      r = SYSCALL_CANCEL (pselect6_time64, nfds, readfds, writefds,
+			      exceptfds, pts64, NULL);
+      if ((r >= 0 || errno != ENOSYS) && timeout != NULL)
 	{
-	  if (timeout != NULL)
-	    TIMESPEC_TO_TIMEVAL (timeout, &ts64);
-	  return r;
+	  TIMESPEC_TO_TIMEVAL (timeout, &ts64);
 	}
-
-      mark_time64_unsupported ();
+      else
+	__set_errno (EOVERFLOW);
+      return r;
     }
 
-#ifndef __ASSUME_TIME64_SYSCALLS
-  struct timespec ts32, *pts32 = NULL;
-  if (pts64 != NULL)
-    {
-      if (! in_time_t_range (pts64->tv_sec))
-	{
-	  __set_errno (EINVAL);
-	  return -1;
-	}
-      ts32.tv_sec = s;
-      ts32.tv_nsec = ns;
-      pts32 = &ts32;
-    }
-# ifndef __ASSUME_PSELECT
-#  ifdef __NR__newselect
-#   undef __NR_select
-#   define __NR_select __NR__newselect
-#  endif
-  r = SYSCALL_CANCEL (select, nfds, readfds, writefds, exceptfds, pts32);
-# else
-  r = SYSCALL_CANCEL (pselect6, nfds, readfds, writefds, exceptfds, pts32,
-		      NULL);
-# endif
-  if (timeout != NULL)
-    *timeout = valid_timespec_to_timeval64 (ts32);
+  return __select32 (nfds, readfds, writefds, exceptfds, pts64, timeout);
 #endif
-
-  return r;
 }
 
 #if __TIMESIZE != 64
diff --git a/sysdeps/unix/sysv/linux/select32.c b/sysdeps/unix/sysv/linux/select32.c
new file mode 100644
index 0000000000..b7e122fe2c
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/select32.c
@@ -0,0 +1,58 @@
+/* Synchronous I/O multiplexing.  Linux 32-bit time fallback.
+   Copyright (C) 2020-2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sys/select.h>
+#include <sysdep-cancel.h>
+
+#ifndef __ASSUME_TIME64_SYSCALLS
+
+int
+__select32 (int nfds, fd_set *readfds, fd_set *writefds,
+	    fd_set *exceptfds, const struct __timespec64 *ts64,
+	    struct __timeval64 *timeout)
+{
+#ifdef __ASSUME_PSELECT
+  struct timespec ts32, *pts32 = NULL;
+  if (ts64 != NULL)
+    {
+      ts32.tv_sec = ts64->tv_sec;
+      ts32.tv_nsec = ts64->tv_nsec;
+      pts32 = &ts32;
+    }
+
+  int r = SYSCALL_CANCEL (pselect6, nfds, readfds, writefds, exceptfds, pts32,
+			  NULL);
+  if (timeout != NULL)
+    TIMESPEC_TO_TIMEVAL (timeout, pts32);
+  return r;
+#else
+  struct timeval tv32, *ptv32 = NULL;
+  if (ts64 != NULL)
+    {
+      tv32 = valid_timespec64_to_timeval (*ts64);
+      ptv32 = &tv32;
+    }
+
+  int r = SYSCALL_CANCEL (select, nfds, readfds, writefds, exceptfds, ptv32);
+  if (timeout != NULL)
+    *timeout = valid_timeval_to_timeval64 (tv32);
+  return r;
+#endif /* __ASSUME_PSELECT  */
+}
+
+#endif
-- 
2.30.2



More information about the Libc-alpha mailing list