This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch master updated. glibc-2.20-64-g845a734


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  845a73434c829341db4d250f837754afb17ae1a2 (commit)
       via  83d641efd1f6eb699dd846b9e99af7bcff7e063e (commit)
       via  d9cd52e64d7b6b0fd56566de87c826cb6fe3677d (commit)
       via  4b68ea1162f39f62590e49eb5d4371871e071f32 (commit)
       via  c86f7b80f43d7336eab1119dae78b0f10b7244ec (commit)
       via  1c4c1a6f4d0e8ffab24419d136fbfe698a201d24 (commit)
      from  99d86ea324820ec7b7755377182922a6233e25fb (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=845a73434c829341db4d250f837754afb17ae1a2

commit 845a73434c829341db4d250f837754afb17ae1a2
Author: Chris Metcalf <cmetcalf@tilera.com>
Date:   Wed Oct 1 15:10:04 2014 -0400

    tile: add clock_gettime support via vDSO

diff --git a/ChangeLog b/ChangeLog
index 0ab15a2..e127a08 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,17 @@
 2014-10-02  Chris Metcalf  <cmetcalf@tilera.com>
 
+	* sysdeps/unix/sysv/linux/tile/sysdep.h (INLINE_VSYSCALL): Define
+	INLINE_VSYSCALL, INTERNAL_VSYSCALL, and
+	HAVE_CLOCK_GETTIME_VSYSCALL macros.
+	* sysdeps/unix/sysv/linux/tile/gettimeofday.c (__gettimeofday):
+	Use INLINE_VSYSCALL macro.
+	* sysdeps/unix/sysv/linux/tile/bits/libc-vdso: Add declaration of
+	__vdso_clock_gettime.
+	* sysdeps/unix/sysv/linux/tile/init-first.c
+	(_libc_vdso_platform_setup): Set new __vdso_clock_gettime global.
+	* sysdeps/unix/sysv/linux/tile/Versions (GLIBC_PRIVATE): Add
+	__vdso_clock_gettime.
+
 	* sysdeps/unix/sysv/linux/tile/clone.S (__clone): Fix code
 	to set up frame more cleanly.
 
diff --git a/sysdeps/unix/sysv/linux/tile/Versions b/sysdeps/unix/sysv/linux/tile/Versions
index 9b40d28..13da68f 100644
--- a/sysdeps/unix/sysv/linux/tile/Versions
+++ b/sysdeps/unix/sysv/linux/tile/Versions
@@ -13,5 +13,6 @@ libc {
   }
   GLIBC_PRIVATE {
     __syscall_error;
+    __vdso_clock_gettime;
   }
 }
diff --git a/sysdeps/unix/sysv/linux/tile/bits/libc-vdso.h b/sysdeps/unix/sysv/linux/tile/bits/libc-vdso.h
index c4aec16..f5b04ba 100644
--- a/sysdeps/unix/sysv/linux/tile/bits/libc-vdso.h
+++ b/sysdeps/unix/sysv/linux/tile/bits/libc-vdso.h
@@ -25,6 +25,8 @@
 extern long int (*__vdso_gettimeofday) (struct timeval *, void *)
   attribute_hidden;
 
+extern long int (*__vdso_clock_gettime) (clockid_t, struct timespec *);
+
 #endif
 
 #endif /* _LIBC_VDSO_H */
diff --git a/sysdeps/unix/sysv/linux/tile/gettimeofday.c b/sysdeps/unix/sysv/linux/tile/gettimeofday.c
index 6f62ab9..2168c2a 100644
--- a/sysdeps/unix/sysv/linux/tile/gettimeofday.c
+++ b/sysdeps/unix/sysv/linux/tile/gettimeofday.c
@@ -24,12 +24,7 @@
 int
 __gettimeofday (struct timeval *tv, struct timezone *tz)
 {
-#ifdef SHARED
-  /* If the vDSO is available we use it. */
-  if (__vdso_gettimeofday != NULL)
-    return __vdso_gettimeofday (tv, tz);
-#endif
-  return INLINE_SYSCALL (gettimeofday, 2, tv, tz);
+  return INLINE_VSYSCALL (gettimeofday, 2, tv, tz);
 }
 
 libc_hidden_def (__gettimeofday)
diff --git a/sysdeps/unix/sysv/linux/tile/init-first.c b/sysdeps/unix/sysv/linux/tile/init-first.c
index 9790d22..fa39b94 100644
--- a/sysdeps/unix/sysv/linux/tile/init-first.c
+++ b/sysdeps/unix/sysv/linux/tile/init-first.c
@@ -21,11 +21,17 @@
 
 long int (*__vdso_gettimeofday) (struct timeval *, void *) attribute_hidden;
 
+long int (*__vdso_clock_gettime) (clockid_t, struct timespec *)
+  __attribute__ ((nocommon));
+strong_alias (__vdso_clock_gettime, __GI___vdso_clock_gettime attribute_hidden)
+
+
 static inline void
 _libc_vdso_platform_setup (void)
 {
   PREPARE_VERSION (linux26, "LINUX_2.6", 61765110);
   __vdso_gettimeofday = _dl_vdso_vsym ("__vdso_gettimeofday", &linux26);
+  __vdso_clock_gettime = _dl_vdso_vsym ("__vdso_clock_gettime", &linux26);
 }
 
 #define VDSO_SETUP _libc_vdso_platform_setup
diff --git a/sysdeps/unix/sysv/linux/tile/sysdep.h b/sysdeps/unix/sysv/linux/tile/sysdep.h
index a09f8a4..238b266 100644
--- a/sysdeps/unix/sysv/linux/tile/sysdep.h
+++ b/sysdeps/unix/sysv/linux/tile/sysdep.h
@@ -202,6 +202,65 @@
   "=R02" (_clobber_r2), "=R03" (_clobber_r3), "=R04" (_clobber_r4),     \
     "=R05" (_clobber_r5), "=R10" (_clobber_r10)
 
+/* This version is for kernels that implement system calls that
+   behave like function calls as far as register saving.
+   It falls back to the syscall in the case that the vDSO doesn't
+   exist or fails for ENOSYS */
+# ifdef SHARED
+#  define INLINE_VSYSCALL(name, nr, args...) \
+  ({									      \
+    __label__ out;							      \
+    __label__ iserr;							      \
+    INTERNAL_SYSCALL_DECL (sc_err);					      \
+    long int sc_ret;							      \
+									      \
+    __typeof (__vdso_##name) vdsop = __vdso_##name;			      \
+    if (vdsop != NULL)							      \
+      {									      \
+        sc_ret = vdsop (args);						      \
+        if (!INTERNAL_SYSCALL_ERROR_P (sc_ret, sc_err))			      \
+          goto out;							      \
+        if (INTERNAL_SYSCALL_ERRNO (sc_ret, sc_err) != ENOSYS)		      \
+          goto iserr;							      \
+      }									      \
+									      \
+    sc_ret = INTERNAL_SYSCALL (name, sc_err, nr, ##args);		      \
+    if (INTERNAL_SYSCALL_ERROR_P (sc_ret, sc_err))			      \
+      {									      \
+      iserr:								      \
+        __set_errno (INTERNAL_SYSCALL_ERRNO (sc_ret, sc_err));		      \
+        sc_ret = -1L;							      \
+      }									      \
+  out:									      \
+    sc_ret;								      \
+  })
+#  define INTERNAL_VSYSCALL(name, err, nr, args...) \
+  ({									      \
+    __label__ out;							      \
+    long int v_ret;							      \
+									      \
+    __typeof (__vdso_##name) vdsop = __vdso_##name;			      \
+    if (vdsop != NULL)							      \
+      {									      \
+        v_ret = vdsop (args);						      \
+        if (!INTERNAL_SYSCALL_ERROR_P (v_ret, err)			      \
+            || INTERNAL_SYSCALL_ERRNO (v_ret, err) != ENOSYS)		      \
+          goto out;							      \
+      }									      \
+    v_ret = INTERNAL_SYSCALL (name, err, nr, ##args);			      \
+  out:									      \
+    v_ret;								      \
+  })
+
+/* List of system calls which are supported as vsyscalls.  */
+#  define HAVE_CLOCK_GETTIME_VSYSCALL	1
+
+# else
+#  define INLINE_VSYSCALL(name, nr, args...) \
+  INLINE_SYSCALL (name, nr, ##args)
+#  define INTERNAL_VSYSCALL(name, err, nr, args...) \
+  INTERNAL_SYSCALL (name, err, nr, ##args)
+# endif
 #endif /* not __ASSEMBLER__ */
 
 /* Pointer mangling support.  */

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=83d641efd1f6eb699dd846b9e99af7bcff7e063e

commit 83d641efd1f6eb699dd846b9e99af7bcff7e063e
Author: Chris Metcalf <cmetcalf@tilera.com>
Date:   Thu Sep 25 16:53:03 2014 -0400

    tile: make the prolog of clone() more conformant
    
    With this change we properly set up the frame first, and tear
    it down last, doing argument checking only when the frame is set up.

diff --git a/ChangeLog b/ChangeLog
index 7dd6029..0ab15a2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
 2014-10-02  Chris Metcalf  <cmetcalf@tilera.com>
 
+	* sysdeps/unix/sysv/linux/tile/clone.S (__clone): Fix code
+	to set up frame more cleanly.
+
 	* sysdeps/tile/memcmp.c: New file.
 
 	* sysdeps/unix/sysv/linux/tile/sysconf.c: New file.
diff --git a/sysdeps/unix/sysv/linux/tile/clone.S b/sysdeps/unix/sysv/linux/tile/clone.S
index f48dba5..0e109f8 100644
--- a/sysdeps/unix/sysv/linux/tile/clone.S
+++ b/sysdeps/unix/sysv/linux/tile/clone.S
@@ -41,10 +41,6 @@
 
 	.text
 ENTRY (__clone)
-	/* sanity check arguments */
-	BEQZ r0, .Linvalid
-	BEQZ r1, .Linvalid
-
 	/* Create a stack frame so we can pass callee-saves to new task. */
 	{
 	 move r10, sp
@@ -71,6 +67,10 @@ ENTRY (__clone)
 	ST r11, r32
 	cfi_offset (r32, FRAME_R32 - FRAME_SIZE)
 
+	/* sanity check arguments */
+	BEQZ r0, .Linvalid
+	BEQZ r1, .Linvalid
+
 	/* Make sure child stack is properly aligned, and set up the
 	   top frame so that we can call out of it immediately in the
 	   child.  Setting it up here means we fault in the parent if
@@ -120,6 +120,7 @@ ENTRY (__clone)
 	swint1
 	BEQZ r0, .Lthread_start  /* If in child task.  */
 
+.Ldone:
 	/* Restore the callee-saved registers and return. */
 	ADDLI_PTR lr, sp, FRAME_SIZE
 	{
@@ -149,7 +150,7 @@ ENTRY (__clone)
 .Linvalid:
 	{
 	 movei r1, EINVAL
-	 j SYSCALL_ERROR_NAME
+	 j .Ldone
 	}
 
 /* This function expects to receive:

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=d9cd52e64d7b6b0fd56566de87c826cb6fe3677d

commit d9cd52e64d7b6b0fd56566de87c826cb6fe3677d
Author: Chris Metcalf <cmetcalf@tilera.com>
Date:   Thu Sep 25 16:49:38 2014 -0400

    tile: optimize memcmp
    
    Customize memcmp.c for tile, using similar tricks from memcpy:
    
    - replace MERGE macro with dblalign.
    - replace memcmp_bytes function with revbytes.
    - use __glibc_likely.
    - use post-increment addressing.
    
    The schedule is still not perfect: the compiler is not hoisting
    code above the comparison branch, which could save a bundle or two.
    memcmp speeds up by 30-40% on shorter aligned tests in benchtest,
    with some tests with unaligned lengths taking a small performance hit.

diff --git a/ChangeLog b/ChangeLog
index 44a45e2..7dd6029 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,7 @@
 2014-10-02  Chris Metcalf  <cmetcalf@tilera.com>
 
+	* sysdeps/tile/memcmp.c: New file.
+
 	* sysdeps/unix/sysv/linux/tile/sysconf.c: New file.
 
 	* sysdeps/tile/tilegx/string-endian.h (STRSHIFT): New macro.
diff --git a/sysdeps/tile/memcmp.c b/sysdeps/tile/memcmp.c
new file mode 100644
index 0000000..8d39921
--- /dev/null
+++ b/sysdeps/tile/memcmp.c
@@ -0,0 +1,367 @@
+/* Copyright (C) 1991-2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Torbjorn Granlund (tege@sics.se).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#undef	__ptr_t
+#define __ptr_t	void *
+
+#if defined HAVE_STRING_H || defined _LIBC
+# include <string.h>
+#endif
+
+#undef memcmp
+
+#ifndef MEMCMP
+# define MEMCMP memcmp
+#endif
+
+#ifdef _LIBC
+
+# include <memcopy.h>
+# include <endian.h>
+
+# if __BYTE_ORDER == __BIG_ENDIAN
+#  define WORDS_BIGENDIAN
+# endif
+
+#else	/* Not in the GNU C library.  */
+
+# include <sys/types.h>
+
+/* Type to use for aligned memory operations.
+   This should normally be the biggest type supported by a single load
+   and store.  Must be an unsigned type.  */
+# define op_t	unsigned long int
+# define OPSIZ	(sizeof(op_t))
+
+/* Threshold value for when to enter the unrolled loops.  */
+# define OP_T_THRES	16
+
+/* Type to use for unaligned operations.  */
+typedef unsigned char byte;
+
+#endif	/* In the GNU C library.  */
+
+/* Provide the appropriate builtins to shift two registers based on
+   the alignment of a pointer held in a third register, and to reverse
+   the bytes in a word.  */
+#ifdef __tilegx__
+#define DBLALIGN __insn_dblalign
+#define REVBYTES __insn_revbytes
+#else
+#define DBLALIGN __insn_dword_align
+#define REVBYTES __insn_bytex
+#endif
+
+#ifdef WORDS_BIGENDIAN
+# define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1)
+#else
+# define CMP_LT_OR_GT(a, b) (REVBYTES(a) > REVBYTES(b) ? 1 : -1)
+#endif
+
+/* BE VERY CAREFUL IF YOU CHANGE THIS CODE!  */
+
+/* The strategy of this memcmp is:
+
+   1. Compare bytes until one of the block pointers is aligned.
+
+   2. Compare using memcmp_common_alignment or
+      memcmp_not_common_alignment, regarding the alignment of the other
+      block after the initial byte operations.  The maximum number of
+      full words (of type op_t) are compared in this way.
+
+   3. Compare the few remaining bytes.  */
+
+static int memcmp_common_alignment (long, long, size_t) __THROW;
+
+/* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t'
+   objects (not LEN bytes!).  Both SRCP1 and SRCP2 should be aligned for
+   memory operations on `op_t's.  */
+static int
+memcmp_common_alignment (srcp1, srcp2, len)
+     long int srcp1;
+     long int srcp2;
+     size_t len;
+{
+  op_t a0, a1;
+  op_t b0, b1;
+
+  switch (len % 4)
+    {
+    default: /* Avoid warning about uninitialized local variables.  */
+    case 2:
+      a0 = ((op_t *) srcp1)[0];
+      b0 = ((op_t *) srcp2)[0];
+      srcp1 += OPSIZ;
+      srcp2 += OPSIZ;
+      len += 2;
+      goto do1;
+    case 3:
+      a1 = ((op_t *) srcp1)[0];
+      b1 = ((op_t *) srcp2)[0];
+      srcp1 += OPSIZ;
+      srcp2 += OPSIZ;
+      len += 1;
+      goto do2;
+    case 0:
+      if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+	return 0;
+      a0 = ((op_t *) srcp1)[0];
+      b0 = ((op_t *) srcp2)[0];
+      srcp1 += OPSIZ;
+      srcp2 += OPSIZ;
+      goto do3;
+    case 1:
+      a1 = ((op_t *) srcp1)[0];
+      b1 = ((op_t *) srcp2)[0];
+      srcp1 += OPSIZ;
+      srcp2 += OPSIZ;
+      len -= 1;
+      if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+	goto do0;
+      /* Fall through.  */
+    }
+
+  do
+    {
+      a0 = ((op_t *) srcp1)[0];
+      b0 = ((op_t *) srcp2)[0];
+      srcp1 += OPSIZ;
+      srcp2 += OPSIZ;
+      if (__glibc_likely (a1 != b1))
+	return CMP_LT_OR_GT (a1, b1);
+
+    do3:
+      a1 = ((op_t *) srcp1)[0];
+      b1 = ((op_t *) srcp2)[0];
+      srcp1 += OPSIZ;
+      srcp2 += OPSIZ;
+      if (__glibc_likely (a0 != b0))
+	return CMP_LT_OR_GT (a0, b0);
+
+    do2:
+      a0 = ((op_t *) srcp1)[0];
+      b0 = ((op_t *) srcp2)[0];
+      srcp1 += OPSIZ;
+      srcp2 += OPSIZ;
+      if (__glibc_likely (a1 != b1))
+	return CMP_LT_OR_GT (a1, b1);
+
+    do1:
+      a1 = ((op_t *) srcp1)[0];
+      b1 = ((op_t *) srcp2)[0];
+      srcp1 += OPSIZ;
+      srcp2 += OPSIZ;
+      if (__glibc_likely (a0 != b0))
+	return CMP_LT_OR_GT (a0, b0);
+
+      len -= 4;
+    }
+  while (len != 0);
+
+  /* This is the right position for do0.  Please don't move
+     it into the loop.  */
+ do0:
+  if (__glibc_likely (a1 != b1))
+    return CMP_LT_OR_GT (a1, b1);
+  return 0;
+}
+
+static int memcmp_not_common_alignment (long, long, size_t) __THROW;
+
+/* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN
+   `op_t' objects (not LEN bytes!).  SRCP2 should be aligned for memory
+   operations on `op_t', but SRCP1 *should be unaligned*.  */
+static int
+memcmp_not_common_alignment (srcp1, srcp2, len)
+     long int srcp1;
+     long int srcp2;
+     size_t len;
+{
+  void * srcp1i;
+  op_t a0, a1, a2, a3;
+  op_t b0, b1, b2, b3;
+  op_t x;
+
+  /* Calculate how to shift a word read at the memory operation
+     aligned srcp1 to make it aligned for comparison.  */
+
+  srcp1i = (void *) srcp1;
+
+  /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t'
+     it points in the middle of.  */
+  srcp1 &= -OPSIZ;
+
+  switch (len % 4)
+    {
+    default: /* Avoid warning about uninitialized local variables.  */
+    case 2:
+      a1 = ((op_t *) srcp1)[0];
+      a2 = ((op_t *) srcp1)[1];
+      b2 = ((op_t *) srcp2)[0];
+      srcp1 += 2 * OPSIZ;
+      srcp2 += 1 * OPSIZ;
+      len += 2;
+      goto do1;
+    case 3:
+      a0 = ((op_t *) srcp1)[0];
+      a1 = ((op_t *) srcp1)[1];
+      b1 = ((op_t *) srcp2)[0];
+      srcp1 += 2 * OPSIZ;
+      srcp2 += 1 * OPSIZ;
+      len += 1;
+      goto do2;
+    case 0:
+      if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+	return 0;
+      a3 = ((op_t *) srcp1)[0];
+      a0 = ((op_t *) srcp1)[1];
+      b0 = ((op_t *) srcp2)[0];
+      srcp1 += 2 * OPSIZ;
+      srcp2 += 1 * OPSIZ;
+      goto do3;
+    case 1:
+      a2 = ((op_t *) srcp1)[0];
+      a3 = ((op_t *) srcp1)[1];
+      b3 = ((op_t *) srcp2)[0];
+      srcp1 += 2 * OPSIZ;
+      srcp2 += 1 * OPSIZ;
+      len -= 1;
+      if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+	goto do0;
+      /* Fall through.  */
+    }
+
+  do
+    {
+      a0 = ((op_t *) srcp1)[0];
+      b0 = ((op_t *) srcp2)[0];
+      x = DBLALIGN (a2, a3, srcp1i);
+      srcp1 += OPSIZ;
+      srcp2 += OPSIZ;
+      if (__glibc_likely (x != b3))
+	return CMP_LT_OR_GT (x, b3);
+
+    do3:
+      a1 = ((op_t *) srcp1)[0];
+      b1 = ((op_t *) srcp2)[0];
+      x = DBLALIGN (a3, a0, srcp1i);
+      srcp1 += OPSIZ;
+      srcp2 += OPSIZ;
+      if (__glibc_likely (x != b0))
+	return CMP_LT_OR_GT (x, b0);
+
+    do2:
+      a2 = ((op_t *) srcp1)[0];
+      b2 = ((op_t *) srcp2)[0];
+      x = DBLALIGN (a0, a1, srcp1i);
+      srcp1 += OPSIZ;
+      srcp2 += OPSIZ;
+      if (__glibc_likely (x != b1))
+	return CMP_LT_OR_GT (x, b1);
+
+    do1:
+      a3 = ((op_t *) srcp1)[0];
+      b3 = ((op_t *) srcp2)[0];
+      x = DBLALIGN (a1, a2, srcp1i);
+      srcp1 += OPSIZ;
+      srcp2 += OPSIZ;
+      if (__glibc_likely (x != b2))
+	return CMP_LT_OR_GT (x, b2);
+
+      len -= 4;
+    }
+  while (len != 0);
+
+  /* This is the right position for do0.  Please don't move
+     it into the loop.  */
+ do0:
+  x = DBLALIGN (a2, a3, srcp1i);
+  if (__glibc_likely (x != b3))
+    return CMP_LT_OR_GT (x, b3);
+  return 0;
+}
+
+int
+MEMCMP (s1, s2, len)
+     const __ptr_t s1;
+     const __ptr_t s2;
+     size_t len;
+{
+  op_t a0;
+  op_t b0;
+  long int srcp1 = (long int) s1;
+  long int srcp2 = (long int) s2;
+  int res;
+
+  if (len >= OP_T_THRES)
+    {
+      /* There are at least some bytes to compare.  No need to test
+	 for LEN == 0 in this alignment loop.  */
+      while (srcp2 % OPSIZ != 0)
+	{
+	  a0 = ((byte *) srcp1)[0];
+	  b0 = ((byte *) srcp2)[0];
+	  srcp1 += 1;
+	  srcp2 += 1;
+	  res = a0 - b0;
+	  if (__glibc_likely (res != 0))
+	    return res;
+	  len -= 1;
+	}
+
+      /* SRCP2 is now aligned for memory operations on `op_t'.
+	 SRCP1 alignment determines if we can do a simple,
+	 aligned compare or need to shuffle bits.  */
+
+      if (srcp1 % OPSIZ == 0)
+	res = memcmp_common_alignment (srcp1, srcp2, len / OPSIZ);
+      else
+	res = memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ);
+      if (res != 0)
+	return res;
+
+      /* Number of bytes remaining in the interval [0..OPSIZ-1].  */
+      srcp1 += len & -OPSIZ;
+      srcp2 += len & -OPSIZ;
+      len %= OPSIZ;
+    }
+
+  /* There are just a few bytes to compare.  Use byte memory operations.  */
+  while (len != 0)
+    {
+      a0 = ((byte *) srcp1)[0];
+      b0 = ((byte *) srcp2)[0];
+      srcp1 += 1;
+      srcp2 += 1;
+      res = a0 - b0;
+      if (__glibc_likely (res != 0))
+	return res;
+      len -= 1;
+    }
+
+  return 0;
+}
+libc_hidden_builtin_def(memcmp)
+#ifdef weak_alias
+# undef bcmp
+weak_alias (memcmp, bcmp)
+#endif

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=4b68ea1162f39f62590e49eb5d4371871e071f32

commit 4b68ea1162f39f62590e49eb5d4371871e071f32
Author: Chris Metcalf <cmetcalf@tilera.com>
Date:   Thu Sep 25 16:41:55 2014 -0400

    tile: add support for _SC_LEVEL*CACHE* sysconf() queries

diff --git a/ChangeLog b/ChangeLog
index 2474b4d..44a45e2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,7 @@
 2014-10-02  Chris Metcalf  <cmetcalf@tilera.com>
 
+	* sysdeps/unix/sysv/linux/tile/sysconf.c: New file.
+
 	* sysdeps/tile/tilegx/string-endian.h (STRSHIFT): New macro.
 	* sysdeps/tile/tilegx/strcasestr.c: New file.
 	* sysdeps/tile/tilegx/strnlen.c: New file.
diff --git a/sysdeps/unix/sysv/linux/tile/sysconf.c b/sysdeps/unix/sysv/linux/tile/sysconf.c
new file mode 100644
index 0000000..5db983c
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tile/sysconf.c
@@ -0,0 +1,75 @@
+/* Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <unistd.h>
+#include <sys/sysinfo.h>
+#include <arch/chip.h>
+
+static long int linux_sysconf (int name);
+
+/* Get the value of the system variable NAME.  */
+long int
+__sysconf (int name)
+{
+  /* Currently we support only tilepro and tilegx, which have
+     statically-known cache sizes.  */
+  switch (name)
+    {
+    /* Level 1 cache.  */
+    case _SC_LEVEL1_ICACHE_SIZE:
+      return CHIP_L1I_CACHE_SIZE();
+    case _SC_LEVEL1_ICACHE_ASSOC:
+      return CHIP_L1I_ASSOC();
+    case _SC_LEVEL1_ICACHE_LINESIZE:
+      return CHIP_L1I_LINE_SIZE();
+    case _SC_LEVEL1_DCACHE_SIZE:
+      return CHIP_L1D_CACHE_SIZE();
+    case _SC_LEVEL1_DCACHE_ASSOC:
+      return CHIP_L1D_ASSOC();
+    case _SC_LEVEL1_DCACHE_LINESIZE:
+      return CHIP_L1D_LINE_SIZE();
+
+    /* Level 2 cache.  */
+    case _SC_LEVEL2_CACHE_SIZE:
+      return CHIP_L2_CACHE_SIZE();
+    case _SC_LEVEL2_CACHE_ASSOC:
+      return CHIP_L2_ASSOC();
+    case _SC_LEVEL2_CACHE_LINESIZE:
+      return CHIP_L2_LINE_SIZE();
+
+    /* Level 3 cache is layered on level 2 cache.  */
+    case _SC_LEVEL3_CACHE_SIZE:
+      return CHIP_L2_CACHE_SIZE() * __get_nprocs();
+    case _SC_LEVEL3_CACHE_ASSOC:
+      return CHIP_L2_ASSOC();
+    case _SC_LEVEL3_CACHE_LINESIZE:
+      return CHIP_L2_LINE_SIZE();
+
+    /* No level 4 cache.  */
+    case _SC_LEVEL4_CACHE_SIZE:
+    case _SC_LEVEL4_CACHE_ASSOC:
+    case _SC_LEVEL4_CACHE_LINESIZE:
+      return -1;
+    }
+
+  return linux_sysconf (name);
+}
+
+/* Now the generic Linux version.  */
+#undef __sysconf
+#define __sysconf static linux_sysconf
+#include <sysdeps/unix/sysv/linux/sysconf.c>

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=c86f7b80f43d7336eab1119dae78b0f10b7244ec

commit c86f7b80f43d7336eab1119dae78b0f10b7244ec
Author: Chris Metcalf <cmetcalf@tilera.com>
Date:   Mon Sep 15 20:10:18 2014 -0400

    tilegx: provide optimized strnlen, strstr, and strcasestr
    
    strnlen() is based on the existing tile strlen() with length
    checking added.  It speeds up by up to 5x, but on average across
    the benchtest corpus by around 35%.  No regressions are seen.
    
    strstr() does 8-byte aligned loads and compares using a 2-byte
    filter on the first two bytes of the needle and then testing
    the remaining bytes in needle using memcmp().  It speeds up
    about 5x in the best case (for "found" needles), about 2x looking
    at benchtest as a whole, with some slowdowns as much as 45%.
    on a few cases (including the "fail" case for 128KB search).
    
    strcasestr() is based on strstr() but uses a SIMD tolower
    routine to convert 8-bytes to lower case in 5 instructions.
    It also uses a 2-byte filter and then strncasecmp() for the
    remaining bytes.  strncasecmp() is not optimized for SIMD, so
    there is futher room for improvement.  However, it is still up
    to 16x faster for "found" needles, averaging 2x faster on the
    whole corpus of benchtests.  It does slow down by up to 35%
    on a few cases, similarly to strstr().

diff --git a/ChangeLog b/ChangeLog
index ce5ec8f..2474b4d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2014-10-02  Chris Metcalf  <cmetcalf@tilera.com>
 
+	* sysdeps/tile/tilegx/string-endian.h (STRSHIFT): New macro.
+	* sysdeps/tile/tilegx/strcasestr.c: New file.
+	* sysdeps/tile/tilegx/strnlen.c: New file.
+	* sysdeps/tile/tilegx/strstr.c: New file.
+
 	* sysdeps/tile/tilegx/string-endian.h (copy_byte): Optimize.
 
 2014-10-06  Arjun Shankar  <arjun.is@lostca.se>
diff --git a/sysdeps/tile/tilegx/strcasestr.c b/sysdeps/tile/tilegx/strcasestr.c
new file mode 100644
index 0000000..13b0a84
--- /dev/null
+++ b/sysdeps/tile/tilegx/strcasestr.c
@@ -0,0 +1,55 @@
+/* Return the offset of one string within another.
+   Copyright (C) 1994, 1996-2000, 2004, 2008, 2009
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+/* Specification.  */
+#include <string.h>
+
+#include <ctype.h>
+#include <stdbool.h>
+#include <strings.h>
+
+#define USE_AS_STRCASESTR
+#define STRSTR __strcasestr
+#define STRSTR2 strcasestr2
+#define STRCHR strcasechr
+#define STRSTR_SCAN strcasestr_scan
+
+#undef strcasestr
+#undef __strcasestr
+
+#ifndef STRCASESTR
+#define STRCASESTR __strcasestr
+#endif
+
+#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
+
+#define CANON_ELEMENT(c) TOLOWER (c)
+#define CMP_FUNC(p1, p2, l)				\
+  __strncasecmp ((const char *) (p1), (const char *) (p2), l)
+
+#include "strstr.c"
+
+#ifndef NO_ALIAS
+weak_alias (__strcasestr, strcasestr)
+#endif
diff --git a/sysdeps/tile/tilegx/string-endian.h b/sysdeps/tile/tilegx/string-endian.h
index 4733389..2dbc1e4 100644
--- a/sysdeps/tile/tilegx/string-endian.h
+++ b/sysdeps/tile/tilegx/string-endian.h
@@ -16,24 +16,36 @@
    License along with the GNU C Library.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-/* Provide a mask based on the pointer alignment that
+#include <endian.h>
+#include <stdint.h>
+
+/* Provide a set of macros to help keep endianness #ifdefs out of
+   the string functions.
+
+   MASK: Provide a mask based on the pointer alignment that
    sets up non-zero bytes before the beginning of the string.
    The MASK expression works because shift counts are taken mod 64.
-   Also, specify how to count "first" and "last" bits
-   when the bits have been read as a word.  */
 
-#include <stdint.h>
+   NULMASK: Clear bytes beyond a given point in the string.
+
+   CFZ: Find the first zero bit in the 8 string bytes in a long.
+
+   REVCZ: Find the last zero bit in the 8 string bytes in a long.
+
+   STRSHIFT: Shift N bits towards the start of the string.  */
 
-#ifndef __BIG_ENDIAN__
+#if __BYTE_ORDER == __LITTLE_ENDIAN
 #define MASK(x) (__insn_shl(1ULL, (x << 3)) - 1)
 #define NULMASK(x) ((2ULL << x) - 1)
 #define CFZ(x) __insn_ctz(x)
 #define REVCZ(x) __insn_clz(x)
+#define STRSHIFT(x,n) ((x) >> n)
 #else
 #define MASK(x) (__insn_shl(-2LL, ((-x << 3) - 1)))
 #define NULMASK(x) (-2LL << (63 - x))
 #define CFZ(x) __insn_clz(x)
 #define REVCZ(x) __insn_ctz(x)
+#define STRSHIFT(x,n) ((x) << n)
 #endif
 
 /* Create eight copies of the byte in a uint64_t.  Byte Shuffle uses
diff --git a/sysdeps/tile/tilegx/strnlen.c b/sysdeps/tile/tilegx/strnlen.c
new file mode 100644
index 0000000..33ecc03
--- /dev/null
+++ b/sysdeps/tile/tilegx/strnlen.c
@@ -0,0 +1,58 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <string.h>
+#include <stdint.h>
+#include "string-endian.h"
+
+/* Find the length of S, but scan at most MAXLEN characters.  If no
+   '\0' terminator is found in that many characters, return MAXLEN.  */
+size_t
+__strnlen (const char *s, size_t maxlen)
+{
+  /* When maxlen is 0, can't read any bytes or it might cause a page fault.  */
+  if (maxlen == 0)
+    return 0;
+
+  /* Get an aligned pointer. */
+  const uintptr_t s_int = (uintptr_t) s;
+  const uint64_t *p = (const uint64_t *) (s_int & -8);
+  size_t bytes_read = sizeof (*p) - (s_int & (sizeof (*p) - 1));
+
+  /* Read and MASK the first word. */
+  uint64_t v = *p | MASK (s_int);
+
+  uint64_t bits;
+  while ((bits = __insn_v1cmpeqi (v, 0)) == 0)
+    {
+      if (bytes_read >= maxlen)
+	{
+	  /* Read maxlen bytes and didn't find the terminator. */
+	  return maxlen;
+	}
+      v = *++p;
+      bytes_read += sizeof (v);
+    }
+
+  /* Found '\0', check it is not larger than maxlen */
+  size_t len = ((const char *) p) + (CFZ (bits) >> 3) - s;
+  return (len < maxlen ? len : maxlen);
+}
+weak_alias (__strnlen, strnlen)
+libc_hidden_def (strnlen)
diff --git a/sysdeps/tile/tilegx/strstr.c b/sysdeps/tile/tilegx/strstr.c
new file mode 100644
index 0000000..2627e75
--- /dev/null
+++ b/sysdeps/tile/tilegx/strstr.c
@@ -0,0 +1,271 @@
+/* strstr with Tile intrinsics
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* Specification of strstr.  */
+#include <string.h>
+
+#include <stdbool.h>
+#include "string-endian.h"
+
+#define RETURN_TYPE char *
+#define AVAILABLE(h, h_l, j, n_l)			\
+  (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l))	\
+   && ((h_l) = (j) + (n_l)))
+#include "str-two-way.h"
+
+#undef strstr
+
+#ifndef STRSTR
+#define STRSTR strstr
+#endif
+
+#ifndef STRSTR2
+#define STRSTR2 strstr2
+#endif
+
+#ifndef STRCHR
+#define STRCHR strchr
+#endif
+
+#ifndef STRSTR_SCAN
+#define STRSTR_SCAN strstr_scan
+#endif
+
+#ifndef TOLOWER
+# define TOLOWER(Ch) (Ch)
+#endif
+
+#ifdef USE_AS_STRCASESTR
+
+static uint64_t
+vec_tolower (uint64_t cc)
+{
+  /* For Uppercases letters, add 32 to convert to lower case.  */
+  uint64_t less_than_eq_Z = __insn_v1cmpltui (cc, 'Z' + 1);
+  uint64_t less_than_A =  __insn_v1cmpltui (cc, 'A');
+  uint64_t is_upper = __insn_v1cmpne (less_than_eq_Z, less_than_A);
+  return __insn_v1add (cc,__insn_v1shli (is_upper, 5));
+}
+
+/* There is no strcasechr() defined, but needed for 1 byte case
+   of strcasestr(), so create it here.  */
+
+static char *
+strcasechr (const char *s, int c)
+{
+  int z, g;
+
+  c = tolower (c);
+
+  /* Get an aligned pointer.  */
+  const uintptr_t s_int = (uintptr_t) s;
+  const uint64_t *p = (const uint64_t *) (s_int & -8);
+
+  /* Create eight copies of the byte for which we are looking.  */
+  const uint64_t goal = copy_byte(c);
+
+  /* Read the first aligned word, but force bytes before the string to
+     match neither zero nor goal (we make sure the high bit of each byte
+     is 1, and the low 7 bits are all the opposite of the goal byte).  */
+  const uint64_t before_mask = MASK (s_int);
+  uint64_t v =
+    (vec_tolower (*p) | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1));
+
+  uint64_t zero_matches, goal_matches;
+  while (1)
+    {
+      /* Look for a terminating '\0'.  */
+      zero_matches = __insn_v1cmpeqi (v, 0);
+
+      /* Look for the goal byte.  */
+      goal_matches = __insn_v1cmpeq (v, goal);
+
+      if (__builtin_expect ((zero_matches | goal_matches) != 0, 0))
+        break;
+
+      v = vec_tolower (*++p);
+    }
+
+  z = CFZ (zero_matches);
+  g = CFZ (goal_matches);
+
+  /* If we found c before '\0' we got a match. Note that if c == '\0'
+     then g == z, and we correctly return the address of the '\0'
+     rather than NULL.  */
+  return (g <= z) ? ((char *) p) + (g >> 3) : NULL;
+}
+
+# define vec_load(p) vec_tolower (*(p))
+# define STRCHR strcasechr
+# define CMP_FUNC __strncasecmp
+
+#else
+
+# define vec_load(p) (*(p))
+# define STRCHR strchr
+# define CMP_FUNC memcmp
+
+#endif
+
+
+/* Compare 2-character needle using SIMD.  */
+static char *
+STRSTR2 (const char *haystack_start, const char *needle)
+{
+  int z, g;
+
+  __insn_prefetch (haystack_start + 64);
+
+  /* Get an aligned pointer.  */
+  const uintptr_t s_int = (uintptr_t) haystack_start;
+  const uint64_t *p = (const uint64_t *) (s_int & -8);
+
+  /* Create eight copies of the first byte for which we are looking.  */
+  const uint64_t byte1 = copy_byte (TOLOWER (*needle));
+  /* Create eight copies of the second byte for which we are looking.  */
+  const uint64_t byte2 = copy_byte (TOLOWER (*(needle + 1)));
+
+  /* Read the first aligned word, but force bytes before the string to
+     match neither zero nor goal (we make sure the high bit of each byte
+     is 1, and the low 7 bits are all the opposite of the goal byte).  */
+  const uint64_t before_mask = MASK (s_int);
+  uint64_t v =
+    (vec_load (p) | before_mask) ^ (byte1 & __insn_v1shrui (before_mask, 1));
+
+  uint64_t zero_matches, goal_matches;
+  while (1)
+    {
+      /* Look for a terminating '\0'.  */
+      zero_matches = __insn_v1cmpeqi (v, 0);
+      uint64_t byte1_matches = __insn_v1cmpeq (v, byte1);
+      if (__builtin_expect (zero_matches, 0))
+	{
+	  /* This is the last vector.  Don't worry about matches
+	     crossing into the next vector.  Shift the second byte
+	     back 1 byte to align it with the first byte, then and to
+	     check for both matching.  Each vector has a 1 in the LSB
+	     of the byte if there was match.  */
+	  uint64_t byte2_matches = __insn_v1cmpeq (v, byte2);
+	  goal_matches = byte1_matches & STRSHIFT (byte2_matches, 8);
+	  break;
+	}
+      else
+	{
+	  /* This is not the last vector, so load the next vector now.
+	     And compare byte2 to the 8-bytes starting 1 byte shifted from v,
+	     which goes 1-byte into the next vector.  */
+	  uint64_t v2 = vec_load (p + 1);
+	  if (byte1_matches)
+	    {
+	      /* 8-bytes starting 1 byte into v.  */
+	      v = __insn_dblalign (v, v2, (void*)1);
+	      uint64_t byte2_matches_shifted = __insn_v1cmpeq (v, byte2);
+	      goal_matches = byte1_matches & byte2_matches_shifted;
+	      if (__builtin_expect (goal_matches != 0, 0))
+		break;
+	    }
+	  __insn_prefetch (p + 4);
+	  /* Move to next vector.  */
+	  v = v2;
+	  p++;
+	}
+    }
+
+  z = CFZ (zero_matches);
+  g = CFZ (goal_matches);
+
+  /* If we found the match before '\0' we got a true match. Note that
+     if c == '\0' then g == z, and we correctly return the address of
+     the '\0' rather than NULL.  */
+  return (g <= z) ? ((char *) p) + (g >> 3) : NULL;
+}
+
+/* Scan for NEEDLE, using the first two characters as a filter.  */
+static char *
+STRSTR_SCAN (const char *haystack, const char *needle,
+	     unsigned int needle_len)
+{
+  char *match;
+  while (1)
+    {
+      match = STRSTR2 (haystack, needle);
+      if (match == NULL)
+	return NULL;
+      /* Found first two characters of needle, check for remainder.  */
+      if (CMP_FUNC (match + 2, needle + 2, needle_len - 2) == 0)
+	return match;
+      /* Move past the previous match. Could be +2 instead of +1 if
+         first two characters are different, but that tested slower.  */
+      haystack = match + 1;
+    }
+}
+
+/* Return the first occurrence of NEEDLE in HAYSTACK.  Return HAYSTACK
+   if NEEDLE is empty, otherwise NULL if NEEDLE is not found in
+   HAYSTACK.  */
+char *
+STRSTR (const char *haystack_start, const char *needle_start)
+{
+  const char *haystack = haystack_start;
+  const char *needle = needle_start;
+  __insn_prefetch (haystack);
+  size_t needle_len = strlen (needle_start); /* Length of NEEDLE.  */
+  size_t haystack_len; /* Known minimum length of HAYSTACK.  */
+
+  if (needle_len <= 2)
+    {
+      if (needle_len == 1)
+	return STRCHR (haystack_start, *needle_start);
+      if (needle_len == 0)
+	return (char *) haystack_start;
+      else
+	return STRSTR2 (haystack_start, needle_start);
+    }
+
+  /* Fail if NEEDLE is longer than HAYSTACK.  */
+  if (strnlen (haystack, needle_len) < needle_len)
+    return NULL;
+
+  /* Perform the search.  Abstract memory is considered to be an array
+     of 'unsigned char' values, not an array of 'char' values.  See
+     ISO C 99 section 6.2.6.1.  */
+  if (needle_len < 40)
+    return STRSTR_SCAN (haystack_start, needle_start, needle_len);
+  else
+    {
+      /* Reduce the size of haystack using STRSTR2, since it has a smaller
+	 linear coefficient than the Two-Way algorithm.  */
+      haystack = STRSTR2 (haystack_start, needle_start);
+      if (haystack == NULL)
+	return NULL;
+      needle = needle_start;
+      haystack_len = (haystack > haystack_start + needle_len ? 1
+		      : needle_len + haystack_start - haystack);
+
+      return two_way_long_needle ((const unsigned char *) haystack,
+				  haystack_len,
+				  (const unsigned char *) needle, needle_len);
+    }
+}
+#ifndef USE_AS_STRCASESTR
+libc_hidden_builtin_def (STRSTR)
+#endif
+
+#undef LONG_NEEDLE_THRESHOLD

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=1c4c1a6f4d0e8ffab24419d136fbfe698a201d24

commit 1c4c1a6f4d0e8ffab24419d136fbfe698a201d24
Author: Chris Metcalf <cmetcalf@tilera.com>
Date:   Mon Sep 15 20:02:50 2014 -0400

    tilegx: optimize string copy_byte() internal function
    
    We can use one "shufflebytes" instruction instead of 3 "bfins"
    instructions to optimize the string functions.

diff --git a/ChangeLog b/ChangeLog
index ad73cf4..ce5ec8f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2014-10-02  Chris Metcalf  <cmetcalf@tilera.com>
+
+	* sysdeps/tile/tilegx/string-endian.h (copy_byte): Optimize.
+
 2014-10-06  Arjun Shankar  <arjun.is@lostca.se>
 
 	* nptl/tst-setuid3.c: Write errors to stdout.
diff --git a/sysdeps/tile/tilegx/string-endian.h b/sysdeps/tile/tilegx/string-endian.h
index 0c4d517..4733389 100644
--- a/sysdeps/tile/tilegx/string-endian.h
+++ b/sysdeps/tile/tilegx/string-endian.h
@@ -36,12 +36,11 @@
 #define REVCZ(x) __insn_ctz(x)
 #endif
 
-/* Create eight copies of the byte in a uint64_t. */
+/* Create eight copies of the byte in a uint64_t.  Byte Shuffle uses
+   the bytes of srcB as the index into the dest vector to select a
+   byte.  With all indices of zero, the first byte is copied into all
+   the other bytes.  */
 static inline uint64_t copy_byte(uint8_t byte)
 {
-  uint64_t word = byte;
-  word = __insn_bfins(word, word, 8, 15);
-  word = __insn_bfins(word, word, 16, 31);
-  word = __insn_bfins(word, word, 32, 63);
-  return word;
+  return __insn_shufflebytes(byte, 0, 0);
 }

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                     |   28 ++
 sysdeps/tile/memcmp.c                         |  367 +++++++++++++++++++++++++
 sysdeps/tile/tilegx/strcasestr.c              |   55 ++++
 sysdeps/tile/tilegx/string-endian.h           |   33 ++-
 sysdeps/tile/tilegx/strnlen.c                 |   58 ++++
 sysdeps/tile/tilegx/strstr.c                  |  271 ++++++++++++++++++
 sysdeps/unix/sysv/linux/tile/Versions         |    1 +
 sysdeps/unix/sysv/linux/tile/bits/libc-vdso.h |    2 +
 sysdeps/unix/sysv/linux/tile/clone.S          |   11 +-
 sysdeps/unix/sysv/linux/tile/gettimeofday.c   |    7 +-
 sysdeps/unix/sysv/linux/tile/init-first.c     |    6 +
 sysdeps/unix/sysv/linux/tile/sysconf.c        |   75 +++++
 sysdeps/unix/sysv/linux/tile/sysdep.h         |   59 ++++
 13 files changed, 951 insertions(+), 22 deletions(-)
 create mode 100644 sysdeps/tile/memcmp.c
 create mode 100644 sysdeps/tile/tilegx/strcasestr.c
 create mode 100644 sysdeps/tile/tilegx/strnlen.c
 create mode 100644 sysdeps/tile/tilegx/strstr.c
 create mode 100644 sysdeps/unix/sysv/linux/tile/sysconf.c


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]