[patch, ARM] Enable auto-detection of vector size for NEON
Ramana Radhakrishnan
ramana.radhakrishnan@linaro.org
Thu Mar 24 11:33:00 GMT 2011
On 24/03/11 09:06, Ira Rosen wrote:
> Hi,
>
> This patch implements TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES for ARM NEON.
>
> Regtested on arm-linux-gnueabi.
> OK for trunk?
>
This is OK for trunk if there are no regressions.
cheers
Ramana
> Thanks,
> Ira
>
> ChangeLog:
>
> * config/arm/arm.c (arm_autovectorize_vector_sizes): New
> function.
> (TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
>
> testsuite/ChangeLog:
>
> * gcc.dg/vect/vect-outer-5.c: Reduce the distance between data
> accesses to preserve the meaning of the test for doubleword vectors.
> * gcc.dg/vect/no-vfa-pr29145.c: Likewise.
> * gcc.dg/vect/slp-3.c: Reduce the loop bound for the same reason.
>
> Index: config/arm/arm.c
> ===================================================================
> --- config/arm/arm.c (revision 171339)
> +++ config/arm/arm.c (working copy)
> @@ -252,6 +252,7 @@ static bool arm_builtin_support_vector_misalignmen
> bool is_packed);
> static void arm_conditional_register_usage (void);
> static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
> +static unsigned int arm_autovectorize_vector_sizes (void);
>
> ^L
> /* Table of machine attributes. */
> @@ -404,6 +405,9 @@ static const struct default_options arm_option_opt
> #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
> #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
> #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
> +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
> +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
> + arm_autovectorize_vector_sizes
>
> #undef TARGET_MACHINE_DEPENDENT_REORG
> #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
> @@ -23528,6 +23532,12 @@ arm_expand_sync (enum machine_mode mode,
> }
> }
>
> +static unsigned int
> +arm_autovectorize_vector_sizes (void)
> +{
> + return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
> +}
> +
> static bool
> arm_vector_alignment_reachable (const_tree type, bool is_packed)
> {
> Index: testsuite/gcc.dg/vect/vect-outer-5.c
> ===================================================================
> --- testsuite/gcc.dg/vect/vect-outer-5.c (revision 171339)
> +++ testsuite/gcc.dg/vect/vect-outer-5.c (working copy)
> @@ -17,7 +17,7 @@ int main1 ()
> float B[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
> float C[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
> float D[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
> - float E[4] = {0,1,2,480};
> + float E[4] = {0,480,960,1440};
> float s;
>
> int i, j;
> @@ -55,7 +55,7 @@ int main1 ()
> s = 0;
> for (j=0; j<N; j+=4)
> s += C[j];
> - B[i+3] = B[i] + s;
> + B[i+1] = B[i] + s;
> }
>
> /* check results: */
> Index: testsuite/gcc.dg/vect/slp-3.c
> ===================================================================
> --- testsuite/gcc.dg/vect/slp-3.c (revision 171339)
> +++ testsuite/gcc.dg/vect/slp-3.c (working copy)
> @@ -101,7 +101,7 @@ main1 ()
> }
>
> /* SLP with unrolling by 8. */
> - for (i = 0; i< N/2; i++)
> + for (i = 0; i< N/4; i++)
> {
> out[i*9] = in[i*9];
> out[i*9 + 1] = in[i*9 + 1];
> @@ -115,7 +115,7 @@ main1 ()
> }
>
> /* check results: */
> - for (i = 0; i< N/2; i++)
> + for (i = 0; i< N/4; i++)
> {
> if (out[i*9] != in[i*9]
> || out[i*9 + 1] != in[i*9 + 1]
> Index: testsuite/gcc.dg/vect/no-vfa-pr29145.c
> ===================================================================
> --- testsuite/gcc.dg/vect/no-vfa-pr29145.c (revision 171339)
> +++ testsuite/gcc.dg/vect/no-vfa-pr29145.c (working copy)
> @@ -8,7 +8,7 @@ __attribute__ ((noinline))
> void with_restrict(int * __restrict p)
> {
> int i;
> - int *q = p - 2;
> + int *q = p - 1;
>
> for (i = 0; i< 1000; ++i) {
> p[i] = q[i];
> @@ -19,7 +19,7 @@ __attribute__ ((noinline))
> void without_restrict(int * p)
> {
> int i;
> - int *q = p - 2;
> + int *q = p - 1;
>
> for (i = 0; i< 1000; ++i) {
> p[i] = q[i];
> @@ -38,8 +38,8 @@ int main(void)
> a[i] = b[i] = i;
> }
>
> - with_restrict(a + 2);
> - without_restrict(b + 2);
> + with_restrict(a + 1);
> + without_restrict(b + 1);
>
> for (i = 0; i< 1002; ++i) {
> if (a[i] != b[i])
More information about the Gcc-patches
mailing list