GNU C Library master sources branch, master, updated. glibc-2.12-121-g73f27d5

drepper@sourceware.org drepper@sourceware.org
Tue Aug 24 18:36:00 GMT 2010


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  73f27d5e722ece05a66c124406cc8ca4305f4cbd (commit)
      from  84b9230c404aed4fd3a7bb3d045ca367043dde8c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=73f27d5e722ece05a66c124406cc8ca4305f4cbd

commit 73f27d5e722ece05a66c124406cc8ca4305f4cbd
Author: Richard Henderson <rth@redhat.com>
Date:   Tue Aug 24 11:35:01 2010 -0700

    Clean up SSE variable shifts

diff --git a/ChangeLog b/ChangeLog
index 1da347c..f8050d7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2010-08-24  Richard Henderson  <rth@redhat.com>
+	    Ulrich Drepper  <drepper@redhat.com>
+	    H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add varshift.
+	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Likewise.
+	* sysdeps/x86_64/multiarch/strcspn-c.c: Include "varshift.h".
+	Replace _mm_srli_si128 with __m128i_shift_right.  Replace
+	_mm_alignr_epi8 with _mm_loadu_si128.
+	* sysdeps/x86_64/multiarch/strspn-c.c: Likewise.
+	* sysdeps/x86_64/multiarch/strstr.c: Include "varshift.h".
+	(__m128i_shift_right): Removed.
+	* sysdeps/i386/i686/multiarch/varshift.h: New file.
+	* sysdeps/i386/i686/multiarch/varshift.S: New file.
+	* sysdeps/x86_64/multiarch/varshift.h: New file.
+	* sysdeps/x86_64/multiarch/varshift.S: New file.
+
 2010-08-21  Mike Frysinger  <vapier@gentoo.org>
 
 	* configure.in: Move assembler checks to before sysdep dir checking.
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 12bcfc2..26f3e58 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -9,7 +9,7 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
 		   memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
 		   memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
 		   strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
-		   memcmp-ssse3 memcmp-sse4 strcasestr-nonascii
+		   memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
 CFLAGS-strcspn-c.c += -msse4
diff --git a/sysdeps/i386/i686/multiarch/varshift.S b/sysdeps/i386/i686/multiarch/varshift.S
new file mode 100644
index 0000000..41afaf7
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/varshift.S
@@ -0,0 +1 @@
+#include <sysdeps/x86_64/multiarch/varshift.S>
diff --git a/sysdeps/i386/i686/multiarch/varshift.h b/sysdeps/i386/i686/multiarch/varshift.h
new file mode 100644
index 0000000..7c72c70
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/varshift.h
@@ -0,0 +1 @@
+#include <sysdeps/x86_64/multiarch/varshift.h>
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index b124524..27dc563 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -10,7 +10,7 @@ sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
 		   memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
 		   strncase_l-ssse3
 ifeq (yes,$(config-cflags-sse4))
-sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
+sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
 CFLAGS-strcspn-c.c += -msse4
 CFLAGS-strpbrk-c.c += -msse4
 CFLAGS-strspn-c.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
index daeebe1..04aba46 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c.c
@@ -20,6 +20,7 @@
 
 #include <nmmintrin.h>
 #include <string.h>
+#include "varshift.h"
 
 /* We use 0x2:
 	_SIDD_SBYTE_OPS
@@ -86,8 +87,6 @@ STRCSPN_SSE42 (const char *s, const char *a)
 
   const char *aligned;
   __m128i mask;
-  /* Fake initialization.  gcc otherwise will warn.  */
-  asm ("" : "=xm" (mask));
   int offset = (int) ((size_t) a & 15);
   if (offset != 0)
     {
@@ -95,54 +94,7 @@ STRCSPN_SSE42 (const char *s, const char *a)
       aligned = (const char *) ((size_t) a & -16L);
       __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
 
-      switch (offset)
-	{
-	case 1:
-	  mask = _mm_srli_si128 (mask0, 1);
-	  break;
-	case 2:
-	  mask = _mm_srli_si128 (mask0, 2);
-	  break;
-	case 3:
-	  mask = _mm_srli_si128 (mask0, 3);
-	  break;
-	case 4:
-	  mask = _mm_srli_si128 (mask0, 4);
-	  break;
-	case 5:
-	  mask = _mm_srli_si128 (mask0, 5);
-	  break;
-	case 6:
-	  mask = _mm_srli_si128 (mask0, 6);
-	  break;
-	case 7:
-	  mask = _mm_srli_si128 (mask0, 7);
-	  break;
-	case 8:
-	  mask = _mm_srli_si128 (mask0, 8);
-	  break;
-	case 9:
-	  mask = _mm_srli_si128 (mask0, 9);
-	  break;
-	case 10:
-	  mask = _mm_srli_si128 (mask0, 10);
-	  break;
-	case 11:
-	  mask = _mm_srli_si128 (mask0, 11);
-	  break;
-	case 12:
-	  mask = _mm_srli_si128 (mask0, 12);
-	  break;
-	case 13:
-	  mask = _mm_srli_si128 (mask0, 13);
-	  break;
-	case 14:
-	  mask = _mm_srli_si128 (mask0, 14);
-	  break;
-	case 15:
-	  mask = _mm_srli_si128 (mask0, 15);
-	  break;
-	}
+      mask = __m128i_shift_right (mask0, offset);
 
       /* Find where the NULL terminator is.  */
       int length = _mm_cmpistri (mask, mask, 0x3a);
@@ -159,55 +111,10 @@ STRCSPN_SSE42 (const char *s, const char *a)
 
 	  if (index != 0)
 	    {
-	      /* Combine mask0 and mask1.  */
-	      switch (offset)
-		{
-		case 1:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 1);
-		  break;
-		case 2:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 2);
-		  break;
-		case 3:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 3);
-		  break;
-		case 4:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 4);
-		  break;
-		case 5:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 5);
-		  break;
-		case 6:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 6);
-		  break;
-		case 7:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 7);
-		  break;
-		case 8:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 8);
-		  break;
-		case 9:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 9);
-		  break;
-		case 10:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 10);
-		  break;
-		case 11:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 11);
-		  break;
-		case 12:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 12);
-		  break;
-		case 13:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 13);
-		  break;
-		case 14:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 14);
-		  break;
-		case 15:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 15);
-		  break;
-		}
+	      /* Combine mask0 and mask1.  We could play games with
+		 palignr, but frankly this data should be in L1 now
+		 so do the merge via an unaligned load.  */
+	      mask = _mm_loadu_si128 ((__m128i *) a);
 	    }
 	}
     }
@@ -234,54 +141,7 @@ STRCSPN_SSE42 (const char *s, const char *a)
       aligned = (const char *) ((size_t) s & -16L);
       __m128i value = _mm_load_si128 ((__m128i *) aligned);
 
-      switch (offset)
-	{
-	case 1:
-	  value = _mm_srli_si128 (value, 1);
-	  break;
-	case 2:
-	  value = _mm_srli_si128 (value, 2);
-	  break;
-	case 3:
-	  value = _mm_srli_si128 (value, 3);
-	  break;
-	case 4:
-	  value = _mm_srli_si128 (value, 4);
-	  break;
-	case 5:
-	  value = _mm_srli_si128 (value, 5);
-	  break;
-	case 6:
-	  value = _mm_srli_si128 (value, 6);
-	  break;
-	case 7:
-	  value = _mm_srli_si128 (value, 7);
-	  break;
-	case 8:
-	  value = _mm_srli_si128 (value, 8);
-	  break;
-	case 9:
-	  value = _mm_srli_si128 (value, 9);
-	  break;
-	case 10:
-	  value = _mm_srli_si128 (value, 10);
-	  break;
-	case 11:
-	  value = _mm_srli_si128 (value, 11);
-	  break;
-	case 12:
-	  value = _mm_srli_si128 (value, 12);
-	  break;
-	case 13:
-	  value = _mm_srli_si128 (value, 13);
-	  break;
-	case 14:
-	  value = _mm_srli_si128 (value, 14);
-	  break;
-	case 15:
-	  value = _mm_srli_si128 (value, 15);
-	  break;
-	}
+      value = __m128i_shift_right (value, offset);
 
       int length = _mm_cmpistri (mask, value, 0x2);
       /* No need to check ZFlag since ZFlag is always 1.  */
diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
index be9e8ac..ab58549 100644
--- a/sysdeps/x86_64/multiarch/strspn-c.c
+++ b/sysdeps/x86_64/multiarch/strspn-c.c
@@ -20,6 +20,7 @@
 
 #include <nmmintrin.h>
 #include <string.h>
+#include "varshift.h"
 
 /* We use 0x12:
 	_SIDD_SBYTE_OPS
@@ -71,54 +72,7 @@ __strspn_sse42 (const char *s, const char *a)
       aligned = (const char *) ((size_t) a & -16L);
       __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
 
-      switch (offset)
-	{
-	case 1:
-	  mask = _mm_srli_si128 (mask0, 1);
-	  break;
-	case 2:
-	  mask = _mm_srli_si128 (mask0, 2);
-	  break;
-	case 3:
-	  mask = _mm_srli_si128 (mask0, 3);
-	  break;
-	case 4:
-	  mask = _mm_srli_si128 (mask0, 4);
-	  break;
-	case 5:
-	  mask = _mm_srli_si128 (mask0, 5);
-	  break;
-	case 6:
-	  mask = _mm_srli_si128 (mask0, 6);
-	  break;
-	case 7:
-	  mask = _mm_srli_si128 (mask0, 7);
-	  break;
-	case 8:
-	  mask = _mm_srli_si128 (mask0, 8);
-	  break;
-	case 9:
-	  mask = _mm_srli_si128 (mask0, 9);
-	  break;
-	case 10:
-	  mask = _mm_srli_si128 (mask0, 10);
-	  break;
-	case 11:
-	  mask = _mm_srli_si128 (mask0, 11);
-	  break;
-	case 12:
-	  mask = _mm_srli_si128 (mask0, 12);
-	  break;
-	case 13:
-	  mask = _mm_srli_si128 (mask0, 13);
-	  break;
-	case 14:
-	  mask = _mm_srli_si128 (mask0, 14);
-	  break;
-	case 15:
-	  mask = _mm_srli_si128 (mask0, 15);
-	  break;
-	}
+      mask = __m128i_shift_right (mask0, offset);
 
       /* Find where the NULL terminator is.  */
       int length = _mm_cmpistri (mask, mask, 0x3a);
@@ -135,55 +89,10 @@ __strspn_sse42 (const char *s, const char *a)
 
 	  if (index != 0)
 	    {
-	      /* Combine mask0 and mask1.  */
-	      switch (offset)
-		{
-		case 1:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 1);
-		  break;
-		case 2:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 2);
-		  break;
-		case 3:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 3);
-		  break;
-		case 4:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 4);
-		  break;
-		case 5:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 5);
-		  break;
-		case 6:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 6);
-		  break;
-		case 7:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 7);
-		  break;
-		case 8:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 8);
-		  break;
-		case 9:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 9);
-		  break;
-		case 10:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 10);
-		  break;
-		case 11:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 11);
-		  break;
-		case 12:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 12);
-		  break;
-		case 13:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 13);
-		  break;
-		case 14:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 14);
-		  break;
-		case 15:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 15);
-		  break;
-		}
+	      /* Combine mask0 and mask1.  We could play games with
+		 palignr, but frankly this data should be in L1 now
+		 so do the merge via an unaligned load.  */
+	      mask = _mm_loadu_si128 ((__m128i *) a);
 	    }
 	}
     }
@@ -210,54 +119,7 @@ __strspn_sse42 (const char *s, const char *a)
       aligned = (const char *) ((size_t) s & -16L);
       __m128i value = _mm_load_si128 ((__m128i *) aligned);
 
-      switch (offset)
-	{
-	case 1:
-	  value = _mm_srli_si128 (value, 1);
-	  break;
-	case 2:
-	  value = _mm_srli_si128 (value, 2);
-	  break;
-	case 3:
-	  value = _mm_srli_si128 (value, 3);
-	  break;
-	case 4:
-	  value = _mm_srli_si128 (value, 4);
-	  break;
-	case 5:
-	  value = _mm_srli_si128 (value, 5);
-	  break;
-	case 6:
-	  value = _mm_srli_si128 (value, 6);
-	  break;
-	case 7:
-	  value = _mm_srli_si128 (value, 7);
-	  break;
-	case 8:
-	  value = _mm_srli_si128 (value, 8);
-	  break;
-	case 9:
-	  value = _mm_srli_si128 (value, 9);
-	  break;
-	case 10:
-	  value = _mm_srli_si128 (value, 10);
-	  break;
-	case 11:
-	  value = _mm_srli_si128 (value, 11);
-	  break;
-	case 12:
-	  value = _mm_srli_si128 (value, 12);
-	  break;
-	case 13:
-	  value = _mm_srli_si128 (value, 13);
-	  break;
-	case 14:
-	  value = _mm_srli_si128 (value, 14);
-	  break;
-	case 15:
-	  value = _mm_srli_si128 (value, 15);
-	  break;
-	}
+      value = __m128i_shift_right (value, offset);
 
       int length = _mm_cmpistri (mask, value, 0x12);
       /* No need to check CFlag since it is always 1.  */
diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
index 45d7a55..b408b75 100644
--- a/sysdeps/x86_64/multiarch/strstr.c
+++ b/sysdeps/x86_64/multiarch/strstr.c
@@ -19,6 +19,7 @@
    02111-1307 USA.  */
 
 #include <nmmintrin.h>
+#include "varshift.h"
 
 #ifndef STRSTR_SSE42
 # define STRSTR_SSE42 __strstr_sse42
@@ -82,67 +83,6 @@
    5.  failed string compare, go back to scanning
  */
 
-/* Fix-up of removal of unneeded data due to 16B aligned load
-   parameters:
-     value: 16B data loaded from 16B aligned address.
-     offset: Offset of target data address relative to 16B aligned load
-	     address.
- */
-
-static __inline__ __m128i
-__m128i_shift_right (__m128i value, int offset)
-{
-  switch (offset)
-    {
-    case 1:
-      value = _mm_srli_si128 (value, 1);
-      break;
-    case 2:
-      value = _mm_srli_si128 (value, 2);
-      break;
-    case 3:
-      value = _mm_srli_si128 (value, 3);
-      break;
-    case 4:
-      value = _mm_srli_si128 (value, 4);
-      break;
-    case 5:
-      value = _mm_srli_si128 (value, 5);
-      break;
-    case 6:
-      value = _mm_srli_si128 (value, 6);
-      break;
-    case 7:
-      value = _mm_srli_si128 (value, 7);
-      break;
-    case 8:
-      value = _mm_srli_si128 (value, 8);
-      break;
-    case 9:
-      value = _mm_srli_si128 (value, 9);
-      break;
-    case 10:
-      value = _mm_srli_si128 (value, 10);
-      break;
-    case 11:
-      value = _mm_srli_si128 (value, 11);
-      break;
-    case 12:
-      value = _mm_srli_si128 (value, 12);
-      break;
-    case 13:
-      value = _mm_srli_si128 (value, 13);
-      break;
-    case 14:
-      value = _mm_srli_si128 (value, 14);
-      break;
-    case 15:
-      value = _mm_srli_si128 (value, 15);
-      break;
-    }
-  return value;
-}
-
 /* Simple replacement of movdqu to address 4KB boundary cross issue.
    If EOS occurs within less than 16B before 4KB boundary, we don't
    cross to next page.  */
diff --git a/sysdeps/x86_64/multiarch/varshift.S b/sysdeps/x86_64/multiarch/varshift.S
new file mode 100644
index 0000000..b50f98b
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/varshift.S
@@ -0,0 +1,30 @@
+/* Helper for variable shifts of SSE registers.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+
+	.section .rodata
+	.hidden	___m128i_shift_right
+	.globl	___m128i_shift_right
+	.size	___m128i_shift_right, 31
+
+___m128i_shift_right:
+	.byte	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15
+	.byte	 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h
new file mode 100644
index 0000000..d679739
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/varshift.h
@@ -0,0 +1,27 @@
+/* Helper for variable shifts of SSE registers.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+
+extern char ___m128i_shift_right[31] __attribute__((visibility("hidden")));
+
+static __inline__ __m128i
+__m128i_shift_right (__m128i value, unsigned long offset)
+{
+  return _mm_shuffle_epi8 (value, _mm_loadu_si128 ((__m128 *) (___m128i_shift_right + offset)));
+}

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                              |   17 ++++
 sysdeps/i386/i686/multiarch/Makefile   |    2 +-
 sysdeps/i386/i686/multiarch/varshift.S |    1 +
 sysdeps/i386/i686/multiarch/varshift.h |    1 +
 sysdeps/x86_64/multiarch/Makefile      |    2 +-
 sysdeps/x86_64/multiarch/strcspn-c.c   |  154 ++------------------------------
 sysdeps/x86_64/multiarch/strspn-c.c    |  152 ++------------------------------
 sysdeps/x86_64/multiarch/strstr.c      |   62 +-------------
 sysdeps/x86_64/multiarch/varshift.S    |   30 ++++++
 sysdeps/x86_64/multiarch/varshift.h    |   27 ++++++
 10 files changed, 93 insertions(+), 355 deletions(-)
 create mode 100644 sysdeps/i386/i686/multiarch/varshift.S
 create mode 100644 sysdeps/i386/i686/multiarch/varshift.h
 create mode 100644 sysdeps/x86_64/multiarch/varshift.S
 create mode 100644 sysdeps/x86_64/multiarch/varshift.h


hooks/post-receive
-- 
GNU C Library master sources



More information about the Glibc-cvs mailing list