This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Enable AVX_Fast_Unaligned_Load by default for Zen


On 07/06/2018 06:07 AM, Pawar, Amit wrote:
> From a6014a05c3f418c7ce6512d706e6ab6bcb818200 Mon Sep 17 00:00:00 2001
> From: Amit Pawar <Amit.Pawar@amd.com>
> Date: Fri, 6 Jul 2018 14:52:35 +0530
> Subject: [PATCH] Preferring AVX_Fast_Unaligned_Load as default from Zen.
> 
> From Zen onwards this will be enabled. It was disabled for Excavator
> case and same will be unchanged.
> 
> 	* sysdeps/x86/cpu-features.c (get_common_indeces):
> 	AVX_Fast_Unaligned_Load is enabled when AVX2 is detected.
> 	* sysdeps/x86/cpu-features.c (init_cpu_features):
> 	AVX_Fast_Unaligned_Load is disabled for Excavator core.

OK to commit with the changes below.

Please remember to update the Changelog too.

Do you have commit access?

Reviewed-by: Carlos O'Donell <carlos@redhat.com>

> ---
>  sysdeps/x86/cpu-features.c | 18 +++++++++++++-----
>  1 file changed, 13 insertions(+), 5 deletions(-)
> 
> diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> index 0fc3674..6b0f56c 100644
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -78,8 +78,15 @@ get_common_indeces (struct cpu_features *cpu_features,
>  	      /* The following features depend on AVX being usable.  */
>  	      /* Determine if AVX2 is usable.  */
>  	      if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
> +	      {
>  		cpu_features->feature[index_arch_AVX2_Usable]
>  		  |= bit_arch_AVX2_Usable;
> +
> +                /* Unaligned load with 256-bit AVX registers are faster on
> +		 * Intel/AMD processors with AVX2.  */

Drop the leading '*' on the second line e.g.

/* Unaligned load with 256-bit AVX registers are faster on
   Intel/AMD processors with AVX2.  */

> +		cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
> +		  |= bit_arch_AVX_Fast_Unaligned_Load;
> +	      }
>  	      /* Determine if FMA is usable.  */
>  	      if (CPU_FEATURES_CPU_P (cpu_features, FMA))
>  		cpu_features->feature[index_arch_FMA_Usable]
> @@ -298,11 +305,6 @@ init_cpu_features (struct cpu_features *cpu_features)
>  	    }
>  	}
>  
> -      /* Unaligned load with 256-bit AVX registers are faster on
> -	 Intel processors with AVX2.  */
> -      if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
> -	cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
> -	  |= bit_arch_AVX_Fast_Unaligned_Load;
>  
>        /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
>           if AVX512ER is available.  Don't use AVX512 to avoid lower CPU
> @@ -351,9 +353,15 @@ init_cpu_features (struct cpu_features *cpu_features)
>  #endif
>  	  /* "Excavator"   */
>  	  if (model >= 0x60 && model <= 0x7f)
> +	  {
>  	    cpu_features->feature[index_arch_Fast_Unaligned_Load]
>  	      |= (bit_arch_Fast_Unaligned_Load
>  		  | bit_arch_Fast_Copy_Backward);
> +
> +	    /* Unaligned AVX loads are slower.*/
> +	    cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
> +		  &= ~bit_arch_AVX_Fast_Unaligned_Load;
> +	  }
>  	}
>      }
>    else
> -- 2.7.4


-- 
Cheers,
Carlos.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]