This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Use AVX_Fast_Unaligned_Load from Zen onwards.


On Fri, Jul 06, 2018 at 09:55:36AM -0400, Amit Pawar wrote:
> From Zen onwards this will be enabled. It was disabled for the
> Excavator case and will remain disabled.

Wasn't it also disabled for Bulldozer and older models?

> Reviewd-by: Carlos O'Donell <carlos@redhat.com>
> ---
>  ChangeLog                  |  7 +++++++
>  sysdeps/x86/cpu-features.c | 18 +++++++++++++-----
>  2 files changed, 20 insertions(+), 5 deletions(-)
> 
> diff --git a/ChangeLog b/ChangeLog
> index 5a1f291b85..7fe8e2463e 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,10 @@
> +2018-07-06  Amit Pawar  <amit.pawar@amd.com>
> +
> +	* sysdeps/x86/cpu-features.c (get_common_indeces):
> +	AVX_Fast_Unaligned_Load is enabled when AVX2 is detected.
> +	* sysdeps/x86/cpu-features.c (init_cpu_features):
> +	AVX_Fast_Unaligned_Load is disabled for Excavator core.
> +
>  2018-07-05  Florian Weimer  <fweimer@redhat.com>
>  
>  	* csu/Makefile (CFLAGS-static-reloc.os): Build with stack
> diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> index 0fc3674c4b..d41ebde823 100644
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -78,8 +78,15 @@ get_common_indeces (struct cpu_features *cpu_features,
>  	      /* The following features depend on AVX being usable.  */
>  	      /* Determine if AVX2 is usable.  */
>  	      if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
> +	      {
>  		cpu_features->feature[index_arch_AVX2_Usable]
>  		  |= bit_arch_AVX2_Usable;
> +
> +	        /* Unaligned load with 256-bit AVX registers are faster on
> +	           Intel/AMD processors with AVX2.  */
> +	        cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
> +		  |= bit_arch_AVX_Fast_Unaligned_Load;
> +	      }
>  	      /* Determine if FMA is usable.  */
>  	      if (CPU_FEATURES_CPU_P (cpu_features, FMA))
>  		cpu_features->feature[index_arch_FMA_Usable]
> @@ -298,11 +305,6 @@ init_cpu_features (struct cpu_features *cpu_features)
>  	    }
>  	}
>  
> -      /* Unaligned load with 256-bit AVX registers are faster on
> -	 Intel processors with AVX2.  */
> -      if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
> -	cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
> -	  |= bit_arch_AVX_Fast_Unaligned_Load;
>  
>        /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
>           if AVX512ER is available.  Don't use AVX512 to avoid lower CPU
> @@ -351,9 +353,15 @@ init_cpu_features (struct cpu_features *cpu_features)
>  #endif
>  	  /* "Excavator"   */
>  	  if (model >= 0x60 && model <= 0x7f)
> +	  {
>  	    cpu_features->feature[index_arch_Fast_Unaligned_Load]
>  	      |= (bit_arch_Fast_Unaligned_Load
>  		  | bit_arch_Fast_Copy_Backward);
> +
> +	    /* Unaligned AVX loads are slower.*/
> +	    cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
> +		  &= ~bit_arch_AVX_Fast_Unaligned_Load;
> +	  }
>  	}
>      }
>    else
> -- 
> 2.17.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]