This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch master updated. glibc-2.26.9000-922-gf18b8dc


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  f18b8dc7d7ef3f01804e241d40f92faf480264c0 (commit)
      from  446d22e91d3113be57a4b0d1151cf337458c3bec (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=f18b8dc7d7ef3f01804e241d40f92faf480264c0

commit f18b8dc7d7ef3f01804e241d40f92faf480264c0
Author: Chris Metcalf <cmetcalf@mellanox.com>
Date:   Tue Dec 5 10:24:56 2017 -0500

    tilegx: work around vector insn bug in gcc
    
    Avoid an issue in gcc where some of the vector (aka SIMD) ops will
    sometimes end up getting wrongly optimized out.  We use these
    instructions in many of the string implementations.  If/when we
    have an upstreamed fix for this problem in gcc we can conditionalize
    the use of the extended assembly workaround in glibc.

diff --git a/ChangeLog b/ChangeLog
index 333012b..ded6845 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2017-12-05  Chris Metcalf  <cmetcalf@mellanox.com>
+
+	* sysdeps/tile/tilegx/string-endian.h (VECOP): Provide working
+	replacements for __insn_xxx builtins for v1cmpeq, v1cmpltu,
+	v1cmpne, v1add, v1shru, v1shl (register and immediate versions).
+	* sysdeps/tile/tilegx/memchr.c (__memchr): Use VECOP function
+	instead of __insn__xxx.
+	* sysdeps/tile/tilegx/rawmemchr.c (__rawmemchr): Likewise.
+	* sysdeps/tile/tilegx/strstr.c (strcasechr): Likewise.
+	* sysdeps/tile/tilegx/strrchr.c (strrchr): Likewise.
+	* sysdeps/tile/tilegx/strlen.c (strlen): Likewise.
+	* sysdeps/tile/tilegx/strchrnul.c (__strchrnul): Likewise.
+	* sysdeps/tile/tilegx/strchr.c (strchr): Likewise.
+
 2017-12-05  Florian Weimer  <fweimer@redhat.com>
 
 	Linux: Implement interfaces for memory protection keys
diff --git a/sysdeps/tile/tilegx/memchr.c b/sysdeps/tile/tilegx/memchr.c
index 7da0f79..38c0da6 100644
--- a/sysdeps/tile/tilegx/memchr.c
+++ b/sysdeps/tile/tilegx/memchr.c
@@ -58,7 +58,7 @@ __memchr (const void *s, int c, size_t n)
   /* Compute the address of the word containing the last byte. */
   last_word_ptr = (const uint64_t *) ((uintptr_t) last_byte_ptr & -8);
 
-  while ((bits = __insn_v1cmpeq (v, goal)) == 0)
+  while ((bits = v1cmpeq (v, goal)) == 0)
     {
       if (__builtin_expect (p == last_word_ptr, 0))
         {
diff --git a/sysdeps/tile/tilegx/rawmemchr.c b/sysdeps/tile/tilegx/rawmemchr.c
index 54b4a5c..3f5044c 100644
--- a/sysdeps/tile/tilegx/rawmemchr.c
+++ b/sysdeps/tile/tilegx/rawmemchr.c
@@ -36,7 +36,7 @@ __rawmemchr (const void *s, int c)
   uint64_t v = (*p | before_mask) ^ (goal & before_mask);
 
   uint64_t bits;
-  while ((bits = __insn_v1cmpeq (v, goal)) == 0)
+  while ((bits = v1cmpeq (v, goal)) == 0)
     v = *++p;
 
   return ((char *) p) + (CFZ (bits) >> 3);
diff --git a/sysdeps/tile/tilegx/strchr.c b/sysdeps/tile/tilegx/strchr.c
index 36dfd31..1a5eb5c 100644
--- a/sysdeps/tile/tilegx/strchr.c
+++ b/sysdeps/tile/tilegx/strchr.c
@@ -38,16 +38,16 @@ strchr (const char *s, int c)
      match neither zero nor goal (we make sure the high bit of each byte
      is 1, and the low 7 bits are all the opposite of the goal byte).  */
   const uint64_t before_mask = MASK (s_int);
-  uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1));
+  uint64_t v = (*p | before_mask) ^ (goal & v1shrui (before_mask, 1));
 
   uint64_t zero_matches, goal_matches;
   while (1)
     {
       /* Look for a terminating '\0'. */
-      zero_matches = __insn_v1cmpeqi (v, 0);
+      zero_matches = v1cmpeqi (v, 0);
 
       /* Look for the goal byte. */
-      goal_matches = __insn_v1cmpeq (v, goal);
+      goal_matches = v1cmpeq (v, goal);
 
       if (__builtin_expect ((zero_matches | goal_matches) != 0, 0))
         break;
diff --git a/sysdeps/tile/tilegx/strchrnul.c b/sysdeps/tile/tilegx/strchrnul.c
index e0f13b6..e3024dd 100644
--- a/sysdeps/tile/tilegx/strchrnul.c
+++ b/sysdeps/tile/tilegx/strchrnul.c
@@ -36,16 +36,16 @@ __strchrnul (const char *s, int c)
      match neither zero nor goal (we make sure the high bit of each byte
      is 1, and the low 7 bits are all the opposite of the goal byte).  */
   const uint64_t before_mask = MASK (s_int);
-  uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1));
+  uint64_t v = (*p | before_mask) ^ (goal & v1shrui (before_mask, 1));
 
   uint64_t zero_matches, goal_matches;
   while (1)
     {
       /* Look for a terminating '\0'. */
-      zero_matches = __insn_v1cmpeqi (v, 0);
+      zero_matches = v1cmpeqi (v, 0);
 
       /* Look for the goal byte. */
-      goal_matches = __insn_v1cmpeq (v, goal);
+      goal_matches = v1cmpeq (v, goal);
 
       if (__builtin_expect ((zero_matches | goal_matches) != 0, 0))
         break;
diff --git a/sysdeps/tile/tilegx/string-endian.h b/sysdeps/tile/tilegx/string-endian.h
index fe9b073..6a3f882 100644
--- a/sysdeps/tile/tilegx/string-endian.h
+++ b/sysdeps/tile/tilegx/string-endian.h
@@ -56,3 +56,28 @@ static inline uint64_t copy_byte(uint8_t byte)
 {
   return __insn_shufflebytes(byte, 0, 0);
 }
+
+/* Implement the byte vector instructions using extended assembly.
+   The __insn_OP() builtins are buggy in current compiler versions.  */
+
+#define VECOP(OP)                                                       \
+  static inline uint64_t OP (uint64_t a, uint64_t b)                    \
+  {                                                                     \
+    uint64_t result;                                                    \
+    asm volatile (#OP " %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));   \
+    return result;                                                      \
+  }                                                                     \
+                                                                        \
+  static inline uint64_t OP ## i (uint64_t a, uint64_t b)               \
+  {                                                                     \
+    uint64_t result;                                                    \
+    asm volatile (#OP "i %0, %1, %2" : "=r"(result) : "r"(a), "I"(b));  \
+    return result;                                                      \
+  }
+
+VECOP(v1cmpeq)
+VECOP(v1cmpltu)
+VECOP(v1cmpne)
+VECOP(v1add)
+VECOP(v1shru)
+VECOP(v1shl)
diff --git a/sysdeps/tile/tilegx/strlen.c b/sysdeps/tile/tilegx/strlen.c
index 5cd04ac..cebdf22 100644
--- a/sysdeps/tile/tilegx/strlen.c
+++ b/sysdeps/tile/tilegx/strlen.c
@@ -31,7 +31,7 @@ strlen (const char *s)
   uint64_t v = *p | MASK (s_int);
 
   uint64_t bits;
-  while ((bits = __insn_v1cmpeqi (v, 0)) == 0)
+  while ((bits = v1cmpeqi (v, 0)) == 0)
     v = *++p;
 
   return ((const char *) p) + (CFZ (bits) >> 3) - s;
diff --git a/sysdeps/tile/tilegx/strnlen.c b/sysdeps/tile/tilegx/strnlen.c
index 5d73a14..c3560d2 100644
--- a/sysdeps/tile/tilegx/strnlen.c
+++ b/sysdeps/tile/tilegx/strnlen.c
@@ -37,7 +37,7 @@ __strnlen (const char *s, size_t maxlen)
   uint64_t v = *p | MASK (s_int);
 
   uint64_t bits;
-  while ((bits = __insn_v1cmpeqi (v, 0)) == 0)
+  while ((bits = v1cmpeqi (v, 0)) == 0)
     {
       if (bytes_read >= maxlen)
 	{
diff --git a/sysdeps/tile/tilegx/strrchr.c b/sysdeps/tile/tilegx/strrchr.c
index 5a9049e..51a08b7 100644
--- a/sysdeps/tile/tilegx/strrchr.c
+++ b/sysdeps/tile/tilegx/strrchr.c
@@ -34,16 +34,16 @@ strrchr (const char *s, int c)
      match neither zero nor goal (we make sure the high bit of each byte
      is 1, and the low 7 bits are all the opposite of the goal byte).  */
   const uint64_t before_mask = MASK (s_int);
-  uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1));
+  uint64_t v = (*p | before_mask) ^ (goal & v1shrui (before_mask, 1));
   const char *found = NULL;
   uint64_t zero_matches, goal_matches;
   while (1)
     {
       /* Look for a terminating '\0'. */
-      zero_matches = __insn_v1cmpeqi (v, 0);
+      zero_matches = v1cmpeqi (v, 0);
 
       /* Look for the goal byte. */
-      goal_matches = __insn_v1cmpeq (v, goal);
+      goal_matches = v1cmpeq (v, goal);
 
       /* If we found the goal, record the last offset. */
       if (__builtin_expect (goal_matches != 0, 0))
diff --git a/sysdeps/tile/tilegx/strstr.c b/sysdeps/tile/tilegx/strstr.c
index 548a920..f82936a 100644
--- a/sysdeps/tile/tilegx/strstr.c
+++ b/sysdeps/tile/tilegx/strstr.c
@@ -57,10 +57,10 @@ static uint64_t
 vec_tolower (uint64_t cc)
 {
   /* For Uppercases letters, add 32 to convert to lower case.  */
-  uint64_t less_than_eq_Z = __insn_v1cmpltui (cc, 'Z' + 1);
-  uint64_t less_than_A =  __insn_v1cmpltui (cc, 'A');
-  uint64_t is_upper = __insn_v1cmpne (less_than_eq_Z, less_than_A);
-  return __insn_v1add (cc,__insn_v1shli (is_upper, 5));
+  uint64_t less_than_eq_Z = v1cmpltui (cc, 'Z' + 1);
+  uint64_t less_than_A =  v1cmpltui (cc, 'A');
+  uint64_t is_upper = v1cmpne (less_than_eq_Z, less_than_A);
+  return v1add (cc, v1shli (is_upper, 5));
 }
 
 /* There is no strcasechr() defined, but needed for 1 byte case
@@ -85,16 +85,16 @@ strcasechr (const char *s, int c)
      is 1, and the low 7 bits are all the opposite of the goal byte).  */
   const uint64_t before_mask = MASK (s_int);
   uint64_t v =
-    (vec_tolower (*p) | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1));
+    (vec_tolower (*p) | before_mask) ^ (goal & v1shrui (before_mask, 1));
 
   uint64_t zero_matches, goal_matches;
   while (1)
     {
       /* Look for a terminating '\0'.  */
-      zero_matches = __insn_v1cmpeqi (v, 0);
+      zero_matches = v1cmpeqi (v, 0);
 
       /* Look for the goal byte.  */
-      goal_matches = __insn_v1cmpeq (v, goal);
+      goal_matches = v1cmpeq (v, goal);
 
       if (__builtin_expect ((zero_matches | goal_matches) != 0, 0))
         break;
@@ -146,14 +146,14 @@ STRSTR2 (const char *haystack_start, const char *needle)
      is 1, and the low 7 bits are all the opposite of the goal byte).  */
   const uint64_t before_mask = MASK (s_int);
   uint64_t v =
-    (vec_load (p) | before_mask) ^ (byte1 & __insn_v1shrui (before_mask, 1));
+    (vec_load (p) | before_mask) ^ (byte1 & v1shrui (before_mask, 1));
 
   uint64_t zero_matches, goal_matches;
   while (1)
     {
       /* Look for a terminating '\0'.  */
-      zero_matches = __insn_v1cmpeqi (v, 0);
-      uint64_t byte1_matches = __insn_v1cmpeq (v, byte1);
+      zero_matches = v1cmpeqi (v, 0);
+      uint64_t byte1_matches = v1cmpeq (v, byte1);
       if (__builtin_expect (zero_matches != 0, 0))
 	{
 	  /* This is the last vector.  Don't worry about matches
@@ -161,7 +161,7 @@ STRSTR2 (const char *haystack_start, const char *needle)
 	     back 1 byte to align it with the first byte, then and to
 	     check for both matching.  Each vector has a 1 in the LSB
 	     of the byte if there was match.  */
-	  uint64_t byte2_matches = __insn_v1cmpeq (v, byte2);
+	  uint64_t byte2_matches = v1cmpeq (v, byte2);
 	  goal_matches = byte1_matches & STRSHIFT (byte2_matches, 8);
 	  break;
 	}
@@ -175,7 +175,7 @@ STRSTR2 (const char *haystack_start, const char *needle)
 	    {
 	      /* 8-bytes starting 1 byte into v.  */
 	      v = __insn_dblalign (v, v2, (void*)1);
-	      uint64_t byte2_matches_shifted = __insn_v1cmpeq (v, byte2);
+	      uint64_t byte2_matches_shifted = v1cmpeq (v, byte2);
 	      goal_matches = byte1_matches & byte2_matches_shifted;
 	      if (__builtin_expect (goal_matches != 0, 0))
 		break;

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                           |   14 ++++++++++++++
 sysdeps/tile/tilegx/memchr.c        |    2 +-
 sysdeps/tile/tilegx/rawmemchr.c     |    2 +-
 sysdeps/tile/tilegx/strchr.c        |    6 +++---
 sysdeps/tile/tilegx/strchrnul.c     |    6 +++---
 sysdeps/tile/tilegx/string-endian.h |   25 +++++++++++++++++++++++++
 sysdeps/tile/tilegx/strlen.c        |    2 +-
 sysdeps/tile/tilegx/strnlen.c       |    2 +-
 sysdeps/tile/tilegx/strrchr.c       |    6 +++---
 sysdeps/tile/tilegx/strstr.c        |   24 ++++++++++++------------
 10 files changed, 64 insertions(+), 25 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]