This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: [PATCH 4/4] powerpc: Remove f{max,min}{f} assembly implementations

From: Adhemerval Zanella <adhemerval dot zanella at linaro dot org>
To: libc-alpha at sourceware dot org
Date: Mon, 26 Dec 2016 17:37:52 -0200
Subject: Re: [PATCH 4/4] powerpc: Remove f{max,min}{f} assembly implementations
Authentication-results: sourceware.org; auth=none
References: <1482156263-22267-1-git-send-email-adhemerval.zanella@linaro.org> <1482156263-22267-4-git-send-email-adhemerval.zanella@linaro.org>
PING.

On 19/12/2016 12:04, Adhemerval Zanella wrote:
> This patch removes the powerpc assembly implementation of fmax/fmin.
> Based on benchtests, the assembly ones shows:
> 
> $ ./testrun.sh benchtests/bench-fmax
>   "fmax": {
>    "": {
>     "duration": 5.07586e+09,
>     "iterations": 2.01676e+09,
>     "max": 1350.39,
>     "min": 2.073,
>     "mean": 2.51684
>    },
>    "qNaN": {
>     "duration": 5.09315e+09,
>     "iterations": 8.4568e+08,
>     "max": 2788,
>     "min": 5.806,
>     "mean": 6.02255
>    },
>    "sNaN": {
>     "duration": 5.09073e+09,
>     "iterations": 8.42316e+08,
>     "max": 4215.84,
>     "min": 5.737,
>     "mean": 6.04373
>    }
> 
> And
> 
> $ ./testrun.sh benchtests/bench-fmin
>   "fmin": {
>    "": {
>     "duration": 5.07711e+09,
>     "iterations": 2.02982e+09,
>     "max": 497.094,
>     "min": 2.073,
>     "mean": 2.50126
>    },
>    "qNaN": {
>     "duration": 5.09134e+09,
>     "iterations": 8.46968e+08,
>     "max": 2255.14,
>     "min": 5.807,
>     "mean": 6.01125
>    },
>    "sNaN": {
>     "duration": 5.09122e+09,
>     "iterations": 8.4746e+08,
>     "max": 1969.38,
>     "min": 5.729,
>     "mean": 6.00763
>    }
>   }
> 
> The default implementation (math/s_f{max.min}_template.c) shows slight better
> latency for all cases:
> 
> $ ./testrun.sh benchtests/bench-fmax
>   "fmax": {
>    "": {
>     "duration": 5.07044e+09,
>     "iterations": 2.38695e+09,
>     "max": 2048.58,
>     "min": 2.073,
>     "mean": 2.12423
>    },
>    "qNaN": {
>     "duration": 5.09004e+09,
>     "iterations": 9.45428e+08,
>     "max": 3306.93,
>     "min": 5.138,
>     "mean": 5.38385
>    },
>    "sNaN": {
>     "duration": 5.08458e+09,
>     "iterations": 1.15959e+09,
>     "max": 972.008,
>     "min": 3.321,
>     "mean": 4.3848
>    }
>   }
> 
> And:
> 
> $ ./testrun.sh benchtests/bench-fmin
>   "fmin": {
>    "": {
>     "duration": 5.06817e+09,
>     "iterations": 2.3913e+09,
>     "max": 1177.9,
>     "min": 2.073,
>     "mean": 2.11942
>    },
>    "qNaN": {
>     "duration": 5.08857e+09,
>     "iterations": 9.45656e+08,
>     "max": 2658.83,
>     "min": 5.09,
>     "mean": 5.38099
>    },
>    "sNaN": {
>     "duration": 5.08093e+09,
>     "iterations": 1.16725e+09,
>     "max": 1030.74,
>     "min": 3.323,
>     "mean": 4.3529
>    }
>   }
> 
> Both were run with GCC 5.4 (ubuntu 16 default installation) using default
> compiler flags on POWER8E 3.4GHz (powerpc64le-linux-gnu).
> ---
>  ChangeLog                              | 10 +++++
>  sysdeps/powerpc/fpu/s_fmax.S           | 77 ----------------------------------
>  sysdeps/powerpc/fpu/s_fmaxf.S          |  1 -
>  sysdeps/powerpc/fpu/s_fmin.S           | 77 ----------------------------------
>  sysdeps/powerpc/fpu/s_fminf.S          |  1 -
>  sysdeps/powerpc/powerpc32/fpu/s_fmax.S |  5 ---
>  sysdeps/powerpc/powerpc32/fpu/s_fmin.S |  5 ---
>  sysdeps/powerpc/powerpc64/fpu/s_fmax.S |  5 ---
>  sysdeps/powerpc/powerpc64/fpu/s_fmin.S |  5 ---
>  9 files changed, 10 insertions(+), 176 deletions(-)
>  delete mode 100644 sysdeps/powerpc/fpu/s_fmax.S
>  delete mode 100644 sysdeps/powerpc/fpu/s_fmaxf.S
>  delete mode 100644 sysdeps/powerpc/fpu/s_fmin.S
>  delete mode 100644 sysdeps/powerpc/fpu/s_fminf.S
>  delete mode 100644 sysdeps/powerpc/powerpc32/fpu/s_fmax.S
>  delete mode 100644 sysdeps/powerpc/powerpc32/fpu/s_fmin.S
>  delete mode 100644 sysdeps/powerpc/powerpc64/fpu/s_fmax.S
>  delete mode 100644 sysdeps/powerpc/powerpc64/fpu/s_fmin.S
> 
> diff --git a/sysdeps/powerpc/fpu/s_fmax.S b/sysdeps/powerpc/fpu/s_fmax.S
> deleted file mode 100644
> index e6405c0..0000000
> --- a/sysdeps/powerpc/fpu/s_fmax.S
> +++ /dev/null
> @@ -1,77 +0,0 @@
> -/* Floating-point maximum.  PowerPC version.
> -   Copyright (C) 1997-2016 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library; if not, see
> -   <http://www.gnu.org/licenses/>.  */
> -
> -#include <sysdep.h>
> -
> -ENTRY(__fmax)
> -/* double [f1] fmax (double [f1] x, double [f2] y); */
> -	fcmpu	cr0,fp1,fp2
> -	blt	cr0,0f		/* if x < y, neither x nor y can be NaN... */
> -	bnulr+	cr0
> -/* x and y are unordered, so one of x or y must be a NaN... */
> -	fcmpu	cr1,fp2,fp2
> -	bun	cr1,1f
> -/* x is a NaN; y is not.  Test if x is signaling.  */
> -#ifdef __powerpc64__
> -	stfd	fp1,-8(r1)
> -	lwz	r3,-8+HIWORD(r1)
> -#else
> -	stwu	r1,-16(r1)
> -	cfi_adjust_cfa_offset (16)
> -	stfd	fp1,8(r1)
> -	lwz	r3,8+HIWORD(r1)
> -	addi	r1,r1,16
> -	cfi_adjust_cfa_offset (-16)
> -#endif
> -	andis.	r3,r3,8
> -	bne	cr0,0f
> -	b	2f
> -1:	/* y is a NaN; x may or may not be.  */
> -	fcmpu	cr1,fp1,fp1
> -	bun	cr1,2f
> -/* y is a NaN; x is not.  Test if y is signaling.  */
> -#ifdef __powerpc64__
> -	stfd	fp2,-8(r1)
> -	lwz	r3,-8+HIWORD(r1)
> -#else
> -	stwu	r1,-16(r1)
> -	cfi_adjust_cfa_offset (16)
> -	stfd	fp2,8(r1)
> -	lwz	r3,8+HIWORD(r1)
> -	addi	r1,r1,16
> -	cfi_adjust_cfa_offset (-16)
> -#endif
> -	andis.	r3,r3,8
> -	bnelr	cr0
> -2:	/* x and y are NaNs, or one is a signaling NaN.  */
> -	fadd	fp1,fp1,fp2
> -	blr
> -0:	fmr	fp1,fp2
> -	blr
> -END(__fmax)
> -
> -weak_alias (__fmax,fmax)
> -
> -/* It turns out that it's safe to use this code even for single-precision.  */
> -strong_alias(__fmax,__fmaxf)
> -weak_alias (__fmax,fmaxf)
> -
> -#ifdef NO_LONG_DOUBLE
> -weak_alias (__fmax,__fmaxl)
> -weak_alias (__fmax,fmaxl)
> -#endif
> diff --git a/sysdeps/powerpc/fpu/s_fmaxf.S b/sysdeps/powerpc/fpu/s_fmaxf.S
> deleted file mode 100644
> index 3c2d62b..0000000
> --- a/sysdeps/powerpc/fpu/s_fmaxf.S
> +++ /dev/null
> @@ -1 +0,0 @@
> -/* __fmaxf is in s_fmax.c  */
> diff --git a/sysdeps/powerpc/fpu/s_fmin.S b/sysdeps/powerpc/fpu/s_fmin.S
> deleted file mode 100644
> index 9ae77fe..0000000
> --- a/sysdeps/powerpc/fpu/s_fmin.S
> +++ /dev/null
> @@ -1,77 +0,0 @@
> -/* Floating-point minimum.  PowerPC version.
> -   Copyright (C) 1997-2016 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library; if not, see
> -   <http://www.gnu.org/licenses/>.  */
> -
> -#include <sysdep.h>
> -
> -ENTRY(__fmin)
> -/* double [f1] fmin (double [f1] x, double [f2] y); */
> -	fcmpu	cr0,fp1,fp2
> -	bgt	cr0,0f		/* if x > y, neither x nor y can be NaN... */
> -	bnulr+	cr0
> -/* x and y are unordered, so one of x or y must be a NaN... */
> -	fcmpu	cr1,fp2,fp2
> -	bun	cr1,1f
> -/* x is a NaN; y is not.  Test if x is signaling.  */
> -#ifdef __powerpc64__
> -	stfd	fp1,-8(r1)
> -	lwz	r3,-8+HIWORD(r1)
> -#else
> -	stwu	r1,-16(r1)
> -	cfi_adjust_cfa_offset (16)
> -	stfd	fp1,8(r1)
> -	lwz	r3,8+HIWORD(r1)
> -	addi	r1,r1,16
> -	cfi_adjust_cfa_offset (-16)
> -#endif
> -	andis.	r3,r3,8
> -	bne	cr0,0f
> -	b	2f
> -1:	/* y is a NaN; x may or may not be.  */
> -	fcmpu	cr1,fp1,fp1
> -	bun	cr1,2f
> -/* y is a NaN; x is not.  Test if y is signaling.  */
> -#ifdef __powerpc64__
> -	stfd	fp2,-8(r1)
> -	lwz	r3,-8+HIWORD(r1)
> -#else
> -	stwu	r1,-16(r1)
> -	cfi_adjust_cfa_offset (16)
> -	stfd	fp2,8(r1)
> -	lwz	r3,8+HIWORD(r1)
> -	addi	r1,r1,16
> -	cfi_adjust_cfa_offset (-16)
> -#endif
> -	andis.	r3,r3,8
> -	bnelr	cr0
> -2:	/* x and y are NaNs, or one is a signaling NaN.  */
> -	fadd	fp1,fp1,fp2
> -	blr
> -0:	fmr	fp1,fp2
> -	blr
> -END(__fmin)
> -
> -weak_alias (__fmin,fmin)
> -
> -/* It turns out that it's safe to use this code even for single-precision.  */
> -strong_alias(__fmin,__fminf)
> -weak_alias (__fmin,fminf)
> -
> -#ifdef NO_LONG_DOUBLE
> -weak_alias (__fmin,__fminl)
> -weak_alias (__fmin,fminl)
> -#endif
> diff --git a/sysdeps/powerpc/fpu/s_fminf.S b/sysdeps/powerpc/fpu/s_fminf.S
> deleted file mode 100644
> index 10ab7fe..0000000
> --- a/sysdeps/powerpc/fpu/s_fminf.S
> +++ /dev/null
> @@ -1 +0,0 @@
> -/* __fminf is in s_fmin.c  */
> diff --git a/sysdeps/powerpc/powerpc32/fpu/s_fmax.S b/sysdeps/powerpc/powerpc32/fpu/s_fmax.S
> deleted file mode 100644
> index 6973576..0000000
> --- a/sysdeps/powerpc/powerpc32/fpu/s_fmax.S
> +++ /dev/null
> @@ -1,5 +0,0 @@
> -#include <math_ldbl_opt.h>
> -#include <sysdeps/powerpc/fpu/s_fmax.S>
> -#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
> -compat_symbol (libm, __fmax, fmaxl, GLIBC_2_1)
> -#endif
> diff --git a/sysdeps/powerpc/powerpc32/fpu/s_fmin.S b/sysdeps/powerpc/powerpc32/fpu/s_fmin.S
> deleted file mode 100644
> index 6d4a0a9..0000000
> --- a/sysdeps/powerpc/powerpc32/fpu/s_fmin.S
> +++ /dev/null
> @@ -1,5 +0,0 @@
> -#include <math_ldbl_opt.h>
> -#include <sysdeps/powerpc/fpu/s_fmin.S>
> -#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
> -compat_symbol (libm, __fmin, fminl, GLIBC_2_1)
> -#endif
> diff --git a/sysdeps/powerpc/powerpc64/fpu/s_fmax.S b/sysdeps/powerpc/powerpc64/fpu/s_fmax.S
> deleted file mode 100644
> index 6973576..0000000
> --- a/sysdeps/powerpc/powerpc64/fpu/s_fmax.S
> +++ /dev/null
> @@ -1,5 +0,0 @@
> -#include <math_ldbl_opt.h>
> -#include <sysdeps/powerpc/fpu/s_fmax.S>
> -#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
> -compat_symbol (libm, __fmax, fmaxl, GLIBC_2_1)
> -#endif
> diff --git a/sysdeps/powerpc/powerpc64/fpu/s_fmin.S b/sysdeps/powerpc/powerpc64/fpu/s_fmin.S
> deleted file mode 100644
> index 6d4a0a9..0000000
> --- a/sysdeps/powerpc/powerpc64/fpu/s_fmin.S
> +++ /dev/null
> @@ -1,5 +0,0 @@
> -#include <math_ldbl_opt.h>
> -#include <sysdeps/powerpc/fpu/s_fmin.S>
> -#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
> -compat_symbol (libm, __fmin, fminl, GLIBC_2_1)
> -#endif
>
References:
- [PATCH 1/4] Adjust benchtests to new support library.
  - From: Adhemerval Zanella
- [PATCH 4/4] powerpc: Remove f{max,min}{f} assembly implementations
  - From: Adhemerval Zanella
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]