This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Re: [PATCH 3/4] Add ILP32 support to aarch64

From: Steve Ellcey <sellcey at cavium dot com>
To: Joseph Myers <joseph at codesourcery dot com>, Wilco Dijkstra <Wilco dot Dijkstra at arm dot com>
Cc: "Ellcey, Steve" <Steve dot Ellcey at cavium dot com>, nd <nd at arm dot com>, "libc-alpha at sourceware dot org" <libc-alpha at sourceware dot org>
Date: Fri, 04 Aug 2017 16:15:32 -0700
Subject: Re: [PATCH 3/4] Add ILP32 support to aarch64
Authentication-results: sourceware.org; auth=none
Authentication-results: spf=none (sender IP is ) smtp.mailfrom=Steve dot Ellcey at cavium dot com;
References: <DB6PR0801MB20533095035144673B49342083B10@DB6PR0801MB2053.eurprd08.prod.outlook.com> <alpine.DEB.2.20.1708040011260.23567@digraph.polyomino.org.uk>
Reply-to: sellcey at cavium dot com
Spamdiagnosticmetadata: NSPM
Spamdiagnosticoutput: 1:99

On Fri, 2017-08-04 at 00:12 +0000, Joseph Myers wrote:
> On Thu, 3 Aug 2017, Wilco Dijkstra wrote:
> 
> > The generic implementation may well be faster... I'm not sure where the
> > requirement of not raising inexact comes from (I don't see it in the definition
> > of lrint, and we generally don't care since inexact is set by almost every FP
> > calculation), but if it is absolutely required you'd special case values larger
> > than LONG_MAX.
> The requirement comes from lrint being bound to IEEE 754 conversion 
> operations, so only raising inexact under the conditions specified and no 
> spurious inexact.


Here is a new version of this patch.  It (mostly) avoids fenv calls
when not needed and preserves any exceptions that may be set on entry
to the function.

Steve Ellcey
sellcey@cavium.com


2017-08-04  Steve Ellcey  <sellcey@cavium.com>

	* sysdeps/aarch64/fpu/s_llrint.c (OREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_llround.c (OREG_SIZE): Likewise.
	* sysdeps/aarch64/fpu/s_llrintf.c (OREGS, IREGS): Remove.
	(IREG_SIZE, OREG_SIZE): New macros.
	* sysdeps/aarch64/fpu/s_llroundf.c: (OREGS, IREGS): Remove.
	(IREG_SIZE, OREG_SIZE): New macros.
	* sysdeps/aarch64/fpu/s_lrintf.c (IREGS): Remove.
	(IREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_lroundf.c (IREGS): Remove.
	(IREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_lrint.c (math_private.h, fenv.h, stdint.h):
	New includes.
	(IREG_SIZE, OREG_SIZE): Initialize if not already set.
	(OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.
	(__CONCATX): Handle exceptions correctly on large values that may
	set FE_INVALID.
	* sysdeps/aarch64/fpu/s_lround.c (IREG_SIZE, OREG_SIZE):
	Initialize if not already set.
        (OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.

diff --git a/sysdeps/aarch64/fpu/s_llrint.c b/sysdeps/aarch64/fpu/s_llrint.c
index c0d0d0e..57821c0 100644
--- a/sysdeps/aarch64/fpu/s_llrint.c
+++ b/sysdeps/aarch64/fpu/s_llrint.c
@@ -18,4 +18,5 @@
 
 #define FUNC llrint
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llrintf.c b/sysdeps/aarch64/fpu/s_llrintf.c
index 67724c6..98ed4f8 100644
--- a/sysdeps/aarch64/fpu/s_llrintf.c
+++ b/sysdeps/aarch64/fpu/s_llrintf.c
@@ -18,6 +18,7 @@
 
 #define FUNC llrintf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llround.c b/sysdeps/aarch64/fpu/s_llround.c
index ed4b192..ef7aedf 100644
--- a/sysdeps/aarch64/fpu/s_llround.c
+++ b/sysdeps/aarch64/fpu/s_llround.c
@@ -18,4 +18,5 @@
 
 #define FUNC llround
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_llroundf.c b/sysdeps/aarch64/fpu/s_llroundf.c
index 360ce8b..294f0f4 100644
--- a/sysdeps/aarch64/fpu/s_llroundf.c
+++ b/sysdeps/aarch64/fpu/s_llroundf.c
@@ -18,6 +18,7 @@
 
 #define FUNC llroundf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_lrint.c b/sysdeps/aarch64/fpu/s_lrint.c
index 8c61a03..19f9b5b 100644
--- a/sysdeps/aarch64/fpu/s_lrint.c
+++ b/sysdeps/aarch64/fpu/s_lrint.c
@@ -16,7 +16,10 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <math_private.h>
 #include <math.h>
+#include <fenv.h>
+#include <stdint.h>
 
 #ifndef FUNC
 # define FUNC lrint
@@ -24,18 +27,37 @@
 
 #ifndef ITYPE
 # define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
 #else
-# ifndef IREGS
-#  error IREGS not defined
+# ifndef IREG_SIZE
+#  error IREG_SIZE not defined
 # endif
 #endif
 
 #ifndef OTYPE
 # define OTYPE long int
+# ifdef __ILP32__
+#  define OREG_SIZE 32
+# else
+#  define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+#  error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
 #endif
 
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
 
 #define __CONCATX(a,b) __CONCAT(a,b)
 
@@ -44,6 +66,33 @@ __CONCATX(__,FUNC) (ITYPE x)
 {
   OTYPE result;
   ITYPE temp;
+
+#if IREG_SIZE == 64 && OREG_SIZE == 32
+  if (__builtin_fabs (x) > INT32_MAX - 2)
+    {
+      /* Converting large values to a 32 bit in may cause the frintx/fcvtza
+	 sequence to set both FE_INVALID and FE_INEXACT.  To avoid this
+         we save and restore the FE and only set one or the other.  */
+
+      fenv_t env;
+      bool invalid_p, inexact_p;
+
+      libc_feholdexcept (&env);
+      asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
+	    "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
+	    : "=r" (result), "=w" (temp) : "w" (x) );
+      invalid_p = libc_fetestexcept (FE_INVALID);
+      inexact_p = libc_fetestexcept (FE_INEXACT);
+      libc_fesetenv (&env);
+
+      if (invalid_p)
+	feraiseexcept (FE_INVALID);
+      else if (inexact_p)
+	feraiseexcept (FE_INEXACT);
+
+      return result;
+  }
+#endif
   asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
         "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
         : "=r" (result), "=w" (temp) : "w" (x) );
diff --git a/sysdeps/aarch64/fpu/s_lrintf.c b/sysdeps/aarch64/fpu/s_lrintf.c
index a995e4b..2e73271 100644
--- a/sysdeps/aarch64/fpu/s_lrintf.c
+++ b/sysdeps/aarch64/fpu/s_lrintf.c
@@ -18,5 +18,5 @@
 
 #define FUNC lrintf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_lround.c b/sysdeps/aarch64/fpu/s_lround.c
index 9be9e7f..1f77d82 100644
--- a/sysdeps/aarch64/fpu/s_lround.c
+++ b/sysdeps/aarch64/fpu/s_lround.c
@@ -24,18 +24,37 @@
 
 #ifndef ITYPE
 # define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
 #else
-# ifndef IREGS
-#  error IREGS not defined
+# ifndef IREG_SIZE
+#  error IREG_SIZE not defined
 # endif
 #endif
 
 #ifndef OTYPE
 # define OTYPE long int
+# ifdef __ILP32__
+#  define OREG_SIZE 32
+# else
+#  define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+#  error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
 #endif
 
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
 
 #define __CONCATX(a,b) __CONCAT(a,b)
 
diff --git a/sysdeps/aarch64/fpu/s_lroundf.c b/sysdeps/aarch64/fpu/s_lroundf.c
index 4a066d4..b30ddb6 100644
--- a/sysdeps/aarch64/fpu/s_lroundf.c
+++ b/sysdeps/aarch64/fpu/s_lroundf.c
@@ -18,5 +18,5 @@
 
 #define FUNC lroundf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #include <s_lround.c>

Follow-Ups:
- Re: [PATCH 3/4] Add ILP32 support to aarch64
  - From: Szabolcs Nagy

References:
- Re: [PATCH 3/4] Add ILP32 support to aarch64
  - From: Wilco Dijkstra
- Re: [PATCH 3/4] Add ILP32 support to aarch64
  - From: Joseph Myers

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]