This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] [x86_32] Don't use SSE4_2 instructions on Intel Silvermont Micro Architecture.


On 06/30/2013 03:57 PM, Liubov Dmitrieva wrote:
> Same patch as recently committed but for 32 bit.
> Attached performance results for current bench glibc test suite.
> Hopefully I caught a moment before the freeze of 2.18.

To make life easier for the reviewer could you please summarize
the performance differences?

> 2013-06-30  Liubov Dmitrieva  <liubov.dmitrieva@intel.com>
> 
>   * sysdeps/i386/i686/multiarch/memcmp.S: Skip SSE4_2
>   version if bit_Slow_SSE4_2 is set.
>   * sysdeps/i386/i686/multiarch/strcmp.S: Likewise.
>   * sysdeps/i386/i686/multiarch/strncase.S: Likewise.
>   * sysdeps/i386/i686/multiarch/strcasecmp.S: Likewise.
>   * sysdeps/i386/i686/multiarch/wmemcmp.S: Likewise.
> 
> --
> Liubov
> 
> 
> silvermont2.patch
> 
> 
> diff --git a/sysdeps/i386/i686/multiarch/memcmp.S b/sysdeps/i386/i686/multiarch/memcmp.S
> index 73d1363..8f4b38e 100644
> --- a/sysdeps/i386/i686/multiarch/memcmp.S
> +++ b/sysdeps/i386/i686/multiarch/memcmp.S
> @@ -40,6 +40,8 @@ ENTRY(memcmp)
>  	leal	__memcmp_ssse3@GOTOFF(%ebx), %eax
>  	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
>  	jz	2f
> +	testl	$bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
> +	jnz	2f
>  	leal	__memcmp_sse4_2@GOTOFF(%ebx), %eax
>  2:	popl	%ebx
>  	cfi_adjust_cfa_offset (-4)

OK.

> @@ -59,6 +61,8 @@ ENTRY(memcmp)
>  	leal	__memcmp_ssse3, %eax
>  	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
>  	jz	2f
> +	testl	$bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features
> +	jnz	2f
>  	leal	__memcmp_sse4_2, %eax
>  2:	ret
>  END(memcmp)

OK.

> diff --git a/sysdeps/i386/i686/multiarch/strcasecmp.S b/sysdeps/i386/i686/multiarch/strcasecmp.S
> index 3b38214..79a154e 100644
> --- a/sysdeps/i386/i686/multiarch/strcasecmp.S
> +++ b/sysdeps/i386/i686/multiarch/strcasecmp.S
> @@ -37,6 +37,8 @@ ENTRY(__strcasecmp)
>  	leal	__strcasecmp_ssse3@GOTOFF(%ebx), %eax
>  	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
>  	jz	2f
> +	testl	$bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
> +	jnz	2f
>  	leal	__strcasecmp_sse4_2@GOTOFF(%ebx), %eax
>  2:	popl	%ebx
>  	cfi_adjust_cfa_offset (-4)

OK.

> @@ -58,6 +60,8 @@ ENTRY(__strcasecmp)
>  	// XXX Temporarily
>  	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
>  	jz	2f
> +	testl	$bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features
> +	jnz	2f
>  	leal	__strcasecmp_sse4_2, %eax
>  #endif
>  2:	ret

You're adding code to an #ifdef'd out block, which is OK,
but I'd like to know why it's disabled.

Could you please find the history behind this?

> diff --git a/sysdeps/i386/i686/multiarch/strcmp.S b/sysdeps/i386/i686/multiarch/strcmp.S
> index 7dc2cef..41dd3b3 100644
> --- a/sysdeps/i386/i686/multiarch/strcmp.S
> +++ b/sysdeps/i386/i686/multiarch/strcmp.S
> @@ -68,6 +68,8 @@ ENTRY(STRCMP)
>  	leal	__STRCMP_SSSE3@GOTOFF(%ebx), %eax
>  	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
>  	jz	2f
> +	testl	$bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
> +	jnz	2f
>  	leal	__STRCMP_SSE4_2@GOTOFF(%ebx), %eax
>  2:	popl	%ebx
>  	cfi_adjust_cfa_offset (-4)

OK.

> @@ -87,6 +89,8 @@ ENTRY(STRCMP)
>  	leal	__STRCMP_SSSE3, %eax
>  	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
>  	jz	2f
> +	testl	$bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features
> +	jnz	2f
>  	leal	__STRCMP_SSE4_2, %eax
>  2:	ret
>  END(STRCMP)

OK.

> diff --git a/sysdeps/i386/i686/multiarch/strncase.S b/sysdeps/i386/i686/multiarch/strncase.S
> index 51c6d72..4045f71 100644
> --- a/sysdeps/i386/i686/multiarch/strncase.S
> +++ b/sysdeps/i386/i686/multiarch/strncase.S
> @@ -37,6 +37,8 @@ ENTRY(__strncasecmp)
>  	leal	__strncasecmp_ssse3@GOTOFF(%ebx), %eax
>  	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
>  	jz	2f
> +	testl	$bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
> +	jnz	2f
>  	leal	__strncasecmp_sse4_2@GOTOFF(%ebx), %eax
>  2:	popl	%ebx
>  	cfi_adjust_cfa_offset (-4)

OK.

> @@ -58,6 +60,8 @@ ENTRY(__strncasecmp)
>  	// XXX Temporarily
>  	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
>  	jz	2f
> +	testl	$bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features
> +	jnz	2f
>  	leal	__strncasecmp_sse4_2, %eax
>  #endif
>  2:	ret

Same #ifdef'd out code block as above, OK, but I'd like to
know why it's disabled.

> diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.S b/sysdeps/i386/i686/multiarch/wmemcmp.S
> index e994038..e685a9f 100644
> --- a/sysdeps/i386/i686/multiarch/wmemcmp.S
> +++ b/sysdeps/i386/i686/multiarch/wmemcmp.S
> @@ -40,6 +40,8 @@ ENTRY(wmemcmp)
>  	leal	__wmemcmp_ssse3@GOTOFF(%ebx), %eax
>  	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
>  	jz	2f
> +	testl	$bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
> +	jnz	2f
>  	leal	__wmemcmp_sse4_2@GOTOFF(%ebx), %eax
>  2:	popl	%ebx
>  	cfi_adjust_cfa_offset (-4)

OK.

Please post a v2.

Cheers,
Carlos.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]