[patch, ARM] Enable auto-detection of vector size for NEON

Thu Mar 24 11:33:00 GMT 2011

On 24/03/11 09:06, Ira Rosen wrote:
> Hi,
>
> This patch implements TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES for ARM NEON.
>
> Regtested on arm-linux-gnueabi.
> OK for trunk?
>

This is OK for trunk if there are no regressions.

cheers
Ramana

> Thanks,
> Ira
>
> ChangeLog:
>
> 	* config/arm/arm.c (arm_autovectorize_vector_sizes): New
> 	function.
> 	(TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
>
> testsuite/ChangeLog:
>
>          * gcc.dg/vect/vect-outer-5.c: Reduce the distance between data
>          accesses to preserve the meaning of the test for doubleword vectors.
>          * gcc.dg/vect/no-vfa-pr29145.c: Likewise.
>          * gcc.dg/vect/slp-3.c: Reduce the loop bound for the same reason.
>
> Index: config/arm/arm.c
> ===================================================================
> --- config/arm/arm.c    (revision 171339)
> +++ config/arm/arm.c    (working copy)
> @@ -252,6 +252,7 @@ static bool arm_builtin_support_vector_misalignmen
>                                                       bool is_packed);
>   static void arm_conditional_register_usage (void);
>   static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
> +static unsigned int arm_autovectorize_vector_sizes (void);
>
>   ^L
>   /* Table of machine attributes.  */
> @@ -404,6 +405,9 @@ static const struct default_options arm_option_opt
>   #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
>   #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
>   #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
> +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
> +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
> +  arm_autovectorize_vector_sizes
>
>   #undef  TARGET_MACHINE_DEPENDENT_REORG
>   #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
> @@ -23528,6 +23532,12 @@ arm_expand_sync (enum machine_mode mode,
>       }
>   }
>
> +static unsigned int
> +arm_autovectorize_vector_sizes (void)
> +{
> +  return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
> +}
> +
>   static bool
>   arm_vector_alignment_reachable (const_tree type, bool is_packed)
>   {
> Index: testsuite/gcc.dg/vect/vect-outer-5.c
> ===================================================================
> --- testsuite/gcc.dg/vect/vect-outer-5.c        (revision 171339)
> +++ testsuite/gcc.dg/vect/vect-outer-5.c        (working copy)
> @@ -17,7 +17,7 @@ int main1 ()
>     float B[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
>     float C[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
>     float D[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
> -  float E[4] = {0,1,2,480};
> +  float E[4] = {0,480,960,1440};
>     float s;
>
>     int i, j;
> @@ -55,7 +55,7 @@ int main1 ()
>         s = 0;
>         for (j=0; j<N; j+=4)
>          s += C[j];
> -      B[i+3] = B[i] + s;
> +      B[i+1] = B[i] + s;
>       }
>
>     /* check results:  */
> Index: testsuite/gcc.dg/vect/slp-3.c
> ===================================================================
> --- testsuite/gcc.dg/vect/slp-3.c       (revision 171339)
> +++ testsuite/gcc.dg/vect/slp-3.c       (working copy)
> @@ -101,7 +101,7 @@ main1 ()
>       }
>
>     /* SLP with unrolling by 8.  */
> -  for (i = 0; i<  N/2; i++)
> +  for (i = 0; i<  N/4; i++)
>       {
>         out[i*9] = in[i*9];
>         out[i*9 + 1] = in[i*9 + 1];
> @@ -115,7 +115,7 @@ main1 ()
>       }
>
>     /* check results:  */
> -  for (i = 0; i<  N/2; i++)
> +  for (i = 0; i<  N/4; i++)
>       {
>         if (out[i*9] !=  in[i*9]
>            || out[i*9 + 1] != in[i*9 + 1]
> Index: testsuite/gcc.dg/vect/no-vfa-pr29145.c
> ===================================================================
> --- testsuite/gcc.dg/vect/no-vfa-pr29145.c      (revision 171339)
> +++ testsuite/gcc.dg/vect/no-vfa-pr29145.c      (working copy)
> @@ -8,7 +8,7 @@ __attribute__ ((noinline))
>   void with_restrict(int * __restrict p)
>   {
>     int i;
> -  int *q = p - 2;
> +  int *q = p - 1;
>
>     for (i = 0; i<  1000; ++i) {
>       p[i] = q[i];
> @@ -19,7 +19,7 @@ __attribute__ ((noinline))
>   void without_restrict(int * p)
>   {
>     int i;
> -  int *q = p - 2;
> +  int *q = p - 1;
>
>     for (i = 0; i<  1000; ++i) {
>       p[i] = q[i];
> @@ -38,8 +38,8 @@ int main(void)
>       a[i] = b[i] = i;
>     }
>
> -  with_restrict(a + 2);
> -  without_restrict(b + 2);
> +  with_restrict(a + 1);
> +  without_restrict(b + 1);
>
>     for (i = 0; i<  1002; ++i) {
>       if (a[i] != b[i])