This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH 3/4] Add ILP32 support to aarch64
- From: Steve Ellcey <sellcey at cavium dot com>
- To: Joseph Myers <joseph at codesourcery dot com>, Wilco Dijkstra <Wilco dot Dijkstra at arm dot com>
- Cc: "Ellcey, Steve" <Steve dot Ellcey at cavium dot com>, nd <nd at arm dot com>, "libc-alpha at sourceware dot org" <libc-alpha at sourceware dot org>
- Date: Fri, 04 Aug 2017 16:15:32 -0700
- Subject: Re: [PATCH 3/4] Add ILP32 support to aarch64
- Authentication-results: sourceware.org; auth=none
- Authentication-results: spf=none (sender IP is ) smtp.mailfrom=Steve dot Ellcey at cavium dot com;
- References: <DB6PR0801MB20533095035144673B49342083B10@DB6PR0801MB2053.eurprd08.prod.outlook.com> <alpine.DEB.2.20.1708040011260.23567@digraph.polyomino.org.uk>
- Reply-to: sellcey at cavium dot com
- Spamdiagnosticmetadata: NSPM
- Spamdiagnosticoutput: 1:99
On Fri, 2017-08-04 at 00:12 +0000, Joseph Myers wrote:
> On Thu, 3 Aug 2017, Wilco Dijkstra wrote:
>
> > The generic implementation may well be faster... I'm not sure where the
> > requirement of not raising inexact comes from (I don't see it in the definition
> > of lrint, and we generally don't care since inexact is set by almost every FP
> > calculation), but if it is absolutely required you'd special case values larger
> > than LONG_MAX.
> The requirement comes from lrint being bound to IEEE 754 conversion
> operations, so only raising inexact under the conditions specified and no
> spurious inexact.
Here is a new version of this patch. It (mostly) avoids fenv calls
when not needed and preserves any exceptions that may be set on entry
to the function.
Steve Ellcey
sellcey@cavium.com
2017-08-04 Steve Ellcey <sellcey@cavium.com>
* sysdeps/aarch64/fpu/s_llrint.c (OREG_SIZE): New macro.
* sysdeps/aarch64/fpu/s_llround.c (OREG_SIZE): Likewise.
* sysdeps/aarch64/fpu/s_llrintf.c (OREGS, IREGS): Remove.
(IREG_SIZE, OREG_SIZE): New macros.
* sysdeps/aarch64/fpu/s_llroundf.c: (OREGS, IREGS): Remove.
(IREG_SIZE, OREG_SIZE): New macros.
* sysdeps/aarch64/fpu/s_lrintf.c (IREGS): Remove.
(IREG_SIZE): New macro.
* sysdeps/aarch64/fpu/s_lroundf.c (IREGS): Remove.
(IREG_SIZE): New macro.
* sysdeps/aarch64/fpu/s_lrint.c (math_private.h, fenv.h, stdint.h):
New includes.
(IREG_SIZE, OREG_SIZE): Initialize if not already set.
(OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.
(__CONCATX): Handle exceptions correctly on large values that may
set FE_INVALID.
* sysdeps/aarch64/fpu/s_lround.c (IREG_SIZE, OREG_SIZE):
Initialize if not already set.
(OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.
diff --git a/sysdeps/aarch64/fpu/s_llrint.c b/sysdeps/aarch64/fpu/s_llrint.c
index c0d0d0e..57821c0 100644
--- a/sysdeps/aarch64/fpu/s_llrint.c
+++ b/sysdeps/aarch64/fpu/s_llrint.c
@@ -18,4 +18,5 @@
#define FUNC llrint
#define OTYPE long long int
+#define OREG_SIZE 64
#include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llrintf.c b/sysdeps/aarch64/fpu/s_llrintf.c
index 67724c6..98ed4f8 100644
--- a/sysdeps/aarch64/fpu/s_llrintf.c
+++ b/sysdeps/aarch64/fpu/s_llrintf.c
@@ -18,6 +18,7 @@
#define FUNC llrintf
#define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
#define OTYPE long long int
+#define OREG_SIZE 64
#include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llround.c b/sysdeps/aarch64/fpu/s_llround.c
index ed4b192..ef7aedf 100644
--- a/sysdeps/aarch64/fpu/s_llround.c
+++ b/sysdeps/aarch64/fpu/s_llround.c
@@ -18,4 +18,5 @@
#define FUNC llround
#define OTYPE long long int
+#define OREG_SIZE 64
#include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_llroundf.c b/sysdeps/aarch64/fpu/s_llroundf.c
index 360ce8b..294f0f4 100644
--- a/sysdeps/aarch64/fpu/s_llroundf.c
+++ b/sysdeps/aarch64/fpu/s_llroundf.c
@@ -18,6 +18,7 @@
#define FUNC llroundf
#define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
#define OTYPE long long int
+#define OREG_SIZE 64
#include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_lrint.c b/sysdeps/aarch64/fpu/s_lrint.c
index 8c61a03..19f9b5b 100644
--- a/sysdeps/aarch64/fpu/s_lrint.c
+++ b/sysdeps/aarch64/fpu/s_lrint.c
@@ -16,7 +16,10 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#include <math_private.h>
#include <math.h>
+#include <fenv.h>
+#include <stdint.h>
#ifndef FUNC
# define FUNC lrint
@@ -24,18 +27,37 @@
#ifndef ITYPE
# define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
#else
-# ifndef IREGS
-# error IREGS not defined
+# ifndef IREG_SIZE
+# error IREG_SIZE not defined
# endif
#endif
#ifndef OTYPE
# define OTYPE long int
+# ifdef __ILP32__
+# define OREG_SIZE 32
+# else
+# define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+# error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
#endif
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
#define __CONCATX(a,b) __CONCAT(a,b)
@@ -44,6 +66,33 @@ __CONCATX(__,FUNC) (ITYPE x)
{
OTYPE result;
ITYPE temp;
+
+#if IREG_SIZE == 64 && OREG_SIZE == 32
+ if (__builtin_fabs (x) > INT32_MAX - 2)
+ {
+ /* Converting large values to a 32 bit in may cause the frintx/fcvtza
+ sequence to set both FE_INVALID and FE_INEXACT. To avoid this
+ we save and restore the FE and only set one or the other. */
+
+ fenv_t env;
+ bool invalid_p, inexact_p;
+
+ libc_feholdexcept (&env);
+ asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
+ "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
+ : "=r" (result), "=w" (temp) : "w" (x) );
+ invalid_p = libc_fetestexcept (FE_INVALID);
+ inexact_p = libc_fetestexcept (FE_INEXACT);
+ libc_fesetenv (&env);
+
+ if (invalid_p)
+ feraiseexcept (FE_INVALID);
+ else if (inexact_p)
+ feraiseexcept (FE_INEXACT);
+
+ return result;
+ }
+#endif
asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
"fcvtzs" "\t%" OREGS "0, %" IREGS "1"
: "=r" (result), "=w" (temp) : "w" (x) );
diff --git a/sysdeps/aarch64/fpu/s_lrintf.c b/sysdeps/aarch64/fpu/s_lrintf.c
index a995e4b..2e73271 100644
--- a/sysdeps/aarch64/fpu/s_lrintf.c
+++ b/sysdeps/aarch64/fpu/s_lrintf.c
@@ -18,5 +18,5 @@
#define FUNC lrintf
#define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
#include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_lround.c b/sysdeps/aarch64/fpu/s_lround.c
index 9be9e7f..1f77d82 100644
--- a/sysdeps/aarch64/fpu/s_lround.c
+++ b/sysdeps/aarch64/fpu/s_lround.c
@@ -24,18 +24,37 @@
#ifndef ITYPE
# define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
#else
-# ifndef IREGS
-# error IREGS not defined
+# ifndef IREG_SIZE
+# error IREG_SIZE not defined
# endif
#endif
#ifndef OTYPE
# define OTYPE long int
+# ifdef __ILP32__
+# define OREG_SIZE 32
+# else
+# define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+# error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
#endif
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
#define __CONCATX(a,b) __CONCAT(a,b)
diff --git a/sysdeps/aarch64/fpu/s_lroundf.c b/sysdeps/aarch64/fpu/s_lroundf.c
index 4a066d4..b30ddb6 100644
--- a/sysdeps/aarch64/fpu/s_lroundf.c
+++ b/sysdeps/aarch64/fpu/s_lroundf.c
@@ -18,5 +18,5 @@
#define FUNC lroundf
#define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
#include <s_lround.c>