This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
GNU C Library master sources branch azanella/generic-strings created. glibc-2.26.9000-1080-g72aa760

From: azanella at sourceware dot org
To: glibc-cvs at sourceware dot org
Date: 3 Jan 2018 15:34:54 -0000
Subject: GNU C Library master sources branch azanella/generic-strings created. glibc-2.26.9000-1080-g72aa760
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, azanella/generic-strings has been created
        at  72aa7602bb7fc7e54aaf3f1f49a18122676e138b (commit)

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=72aa7602bb7fc7e54aaf3f1f49a18122676e138b

commit 72aa7602bb7fc7e54aaf3f1f49a18122676e138b
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date:   Tue Feb 21 17:14:16 2017 -0300

    sh: Add string-fzb.h and string-fzi.h
    
    Use the SH cmp/str on has_{zero,eq,zero_eq} and avoid use builtin
    count leading/trailing zero which for SH calls a libgcc function
    (expanding it to direct byte testing is better than a function call).
    
    	* sysdeps/sh/string-fzb.h: New file.
    	* sysdeps/sh/string-fzi.h: Likewise.

diff --git a/sysdeps/sh/string-fzb.h b/sysdeps/sh/string-fzb.h
new file mode 100644
index 0000000..c9b7c33
--- /dev/null
+++ b/sysdeps/sh/string-fzb.h
@@ -0,0 +1,53 @@
+/* string-fzb.h -- zero byte detection; boolean.  SH4 version.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_FZB_H
+#define STRING_FZB_H 1
+
+#include <string-optype.h>
+
+/* Determine if any byte within X is zero.  This is a pure boolean test.  */
+
+static inline _Bool
+has_zero (op_t x)
+{
+  op_t zero = 0x0, ret;
+  asm volatile ("cmp/str %1,%2\n"
+		"movt %0\n"
+		: "=r" (ret)
+		: "r" (zero), "r" (x));
+  return ret;
+}
+
+/* Likewise, but for byte equality between X1 and X2.  */
+
+static inline _Bool
+has_eq (op_t x1, op_t x2)
+{
+  return has_zero (x1 ^ x2);
+}
+
+/* Likewise, but for zeros in X1 and equal bytes between X1 and X2.  */
+
+static inline _Bool
+has_zero_eq (op_t x1, op_t x2)
+{
+  return has_zero (x1) | has_eq (x1, x2);
+}
+
+#endif /* STRING_FZB_H */
diff --git a/sysdeps/sh/string-fzi.h b/sysdeps/sh/string-fzi.h
new file mode 100644
index 0000000..6aff4b3
--- /dev/null
+++ b/sysdeps/sh/string-fzi.h
@@ -0,0 +1,172 @@
+/* string-fzi.h -- zero byte detection; indexes.  SH4 version.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_FZI_H
+#define STRING_FZI_H 1
+
+#include <limits.h>
+#include <endian.h>
+
+/* A subroutine for the index_zero functions.  Given a test word C, return
+   the (memory order) index of the first byte (in memory order) that is
+   non-zero.  */
+
+/* Given a word X that is known to contain a zero byte, return the
+   index of the first such within the word in memory order.  */
+
+static inline unsigned int
+index_first_zero (op_t x)
+{
+  _Static_assert (sizeof (op_t) == 4, "64-bit not supported");
+
+  /* Since we have no clz insn, direct tests of the bytes is faster
+     than loading up the constants to do the masking.  */
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    { 
+      if ((x & 0x000000FF) == 0)
+        return 0;
+      else if ((x & 0x0000FF00) == 0)
+        return 1;
+      else if ((x & 0x00FF0000) == 0)
+        return 2;
+      return 3;
+    }
+  else
+    { 
+      if ((x >> 24) == 0)
+        return 0;
+      else if ((x & 0x00FF0000) == 0)
+        return 1;
+      else if ((x & 0x0000FF00) == 0)
+        return 2;
+      return 3;
+    }
+}
+
+/* Similarly, but perform the search for byte equality between X1 and X2.  */
+
+static inline unsigned int
+index_first_eq (op_t x1, op_t x2)
+{
+  return index_first_zero (x1 ^ x2);
+}
+
+/* Similarly, but perform the search for zero within X1 or
+   equality between X1 and X2.  */
+
+static inline unsigned int
+index_first_zero_eq (op_t x1, op_t x2)
+{
+  _Static_assert (sizeof (op_t) == 4, "64-bit not supported");
+
+  /* Since we have no clz insn, direct tests of the bytes is faster
+     than loading up the constants to do the masking.  */
+  op_t xeq = x1 ^ x2;
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    { 
+      if (((x1 & 0x000000FF) == 0) || (xeq & 0x000000FF) == 0)
+        return 0;
+      else if (((x1 & 0x0000FF00) == 0) || (xeq & 0x0000FF00) == 0)
+        return 1;
+      else if (((x1 & 0x00FF0000) == 0) || (xeq & 0x00FF0000) == 0)
+        return 2;
+      return 3;
+    }
+  else
+    { 
+      if (((x1 >> 24) == 0) || (xeq >> 24) == 0)
+        return 0;
+      else if (((x1 & 0x00FF0000) == 0) || (xeq & 0x00FF0000) == 0)
+        return 1;
+      else if (((x1 & 0x0000FF00) == 0) || (xeq & 0x0000FF00) == 0)
+        return 2;
+      return 3;
+    }
+}
+
+/* Similarly, but perform the search for zero within X1 or
+   inequality between X1 and X2.  */
+
+static inline unsigned int
+index_first_zero_ne (op_t x1, op_t x2)
+{
+  _Static_assert (sizeof (op_t) == 4, "64-bit not supported");
+
+  /* Since we have no clz insn, direct tests of the bytes is faster
+     than loading up the constants to do the masking.  */
+  op_t xeq = x1 ^ x2;
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    { 
+      if (((x1 & 0x000000FF) == 0) || (xeq & 0x000000FF) != 0)
+        return 0;
+      else if (((x1 & 0x0000FF00) == 0) || (xeq & 0x0000FF00) != 0)
+        return 1;
+      else if (((x1 & 0x00FF0000) == 0) || (xeq & 0x00FF0000) != 0)
+        return 2;
+      return 3;
+    }
+  else
+    { 
+      if (((x1 >> 24) == 0) || (xeq >> 24) != 0)
+        return 0;
+      else if (((x1 & 0x00FF0000) == 0) || (xeq & 0x00FF0000) != 0)
+        return 1;
+      else if (((x1 & 0x0000FF00) == 0) || (xeq & 0x0000FF00) != 0)
+        return 2;
+      return 3;
+    }
+}
+
+/* Similarly, but search for the last zero within X.  */
+
+static inline unsigned int
+index_last_zero (op_t x)
+{
+  _Static_assert (sizeof (op_t) == 4, "64-bit not supported");
+
+  /* Since we have no clz insn, direct tests of the bytes is faster
+     than loading up the constants to do the masking.  */
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    { 
+      if ((x >> 24) == 0)
+        return 3;
+      else if ((x & 0x00FF0000) == 0)
+        return 2;
+      else if ((x & 0x0000FF00) == 0)
+        return 1;
+      return 0;
+    }
+  else
+    { 
+      if ((x & 0x000000FF) == 0)
+        return 3;
+      else if ((x & 0x0000FF00) == 0)
+        return 2;
+      else if ((x & 0x00FF0000) == 0)
+        return 1;
+      return 0;
+    }
+}
+
+static inline unsigned int
+index_last_eq (op_t x1, op_t x2)
+{
+  return index_last_zero (x1 ^ x2);
+}
+
+#endif /* STRING_FZI_H */

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0ce10e49871b2d759f6115bb1355883c31bd5959

commit 0ce10e49871b2d759f6115bb1355883c31bd5959
Author: Richard Henderson <rth@twiddle.net>
Date:   Thu Feb 16 16:26:18 2017 -0200

    powerpc: Add string-fza.h
    
    While ppc has the more important string functions in assembly,
    there are still a few generic routines used.
    
    Use the Power 6 CMPB insn for testing of zeros.
    
    	* sysdeps/powerpc/power6/string-fza.h: New file.
    	* sysdeps/powerpc/powerpc32/power6/string-fza.h: New file.
    	* sysdeps/powerpc/powerpc64/power6/string-fza.h: New file.

diff --git a/sysdeps/powerpc/power6/string-fza.h b/sysdeps/powerpc/power6/string-fza.h
new file mode 100644
index 0000000..1515b00
--- /dev/null
+++ b/sysdeps/powerpc/power6/string-fza.h
@@ -0,0 +1,65 @@
+/* string-fza.h -- zero byte detection; basics.  Power6 version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_FZA_H
+#define STRING_FZA_H 1
+
+#include <string-optype.h>
+
+/* This function returns 0xff for each byte that is
+   equal between X1 and X2.  */
+
+static inline op_t
+find_eq_all (op_t x1, op_t x2)
+{
+  op_t ret;
+  asm ("cmpb %0,%1,%2" : "=r"(ret) : "r"(x1), "r"(x2));
+  return ret;
+}
+
+/* This function returns 0xff for each byte that is zero in X.  */
+
+static inline op_t
+find_zero_all (op_t x)
+{
+  return find_eq_all (x, 0);
+}
+
+/* Identify zero bytes in X1 or equality between X1 and X2.  */
+
+static inline op_t
+find_zero_eq_all (op_t x1, op_t x2)
+{
+  return find_zero_all (x1) | find_eq_all (x1, x2);
+}
+
+/* Identify zero bytes in X1 or inequality between X1 and X2.  */
+
+static inline op_t
+find_zero_ne_all (op_t x1, op_t x2)
+{
+  return find_zero_all (x1) | ~find_eq_all (x1, x2);
+}
+
+/* Define the "inexact" versions in terms of the exact versions.  */
+#define find_zero_low		find_zero_all
+#define find_eq_low		find_eq_all
+#define find_zero_eq_low	find_zero_eq_all
+#define find_zero_ne_low	find_zero_ne_all
+
+#endif /* STRING_FZA_H */
diff --git a/sysdeps/powerpc/powerpc32/power6/string-fza.h b/sysdeps/powerpc/powerpc32/power6/string-fza.h
new file mode 100644
index 0000000..bb00d7c
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power6/string-fza.h
@@ -0,0 +1 @@
+#include <sysdeps/powerpc/power6/string-fza.h>
diff --git a/sysdeps/powerpc/powerpc64/power6/string-fza.h b/sysdeps/powerpc/powerpc64/power6/string-fza.h
new file mode 100644
index 0000000..bb00d7c
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power6/string-fza.h
@@ -0,0 +1 @@
+#include <sysdeps/powerpc/power6/string-fza.h>

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=55ddf7c70da6a8ae23507c41a34d00a127bc1308

commit 55ddf7c70da6a8ae23507c41a34d00a127bc1308
Author: Richard Henderson <rth@twiddle.net>
Date:   Thu Feb 16 16:24:23 2017 -0200

    arm: Add string-fza.h
    
    While arm has the more important string functions in assembly,
    there are still a few generic routines used.
    
    Use the UQSUB8 insn for testing of zeros.
    
    	* sysdeps/arm/armv6t2/string-fza.h: New file.

diff --git a/sysdeps/arm/armv6t2/string-fza.h b/sysdeps/arm/armv6t2/string-fza.h
new file mode 100644
index 0000000..e46abb1
--- /dev/null
+++ b/sysdeps/arm/armv6t2/string-fza.h
@@ -0,0 +1,69 @@
+/* string-fza.h -- zero byte detection; basics.  ARM version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_FZA_H
+#define STRING_FZA_H 1
+
+#include <string-optype.h>
+
+/* This function returns at least one bit set within every byte
+   of X that is zero.  */
+
+static inline op_t
+find_zero_all (op_t x)
+{
+  /* Use unsigned saturated subtraction from 1 in each byte.
+     That leaves 1 for every byte that was zero.  */
+  op_t ret, ones = (op_t)-1 / 0xff;
+  asm ("uqsub8 %0,%1,%2" : "=r"(ret) : "r"(ones), "r"(x));
+  return ret;
+}
+
+/* Identify bytes that are equal between X1 and X2.  */
+
+static inline op_t
+find_eq_all (op_t x1, op_t x2)
+{
+  return find_zero_all (x1 ^ x2);
+}
+
+/* Identify zero bytes in X1 or equality between X1 and X2.  */
+
+static inline op_t
+find_zero_eq_all (op_t x1, op_t x2)
+{
+  return find_zero_all (x1) | find_zero_all (x1 ^ x2);
+}
+
+/* Identify zero bytes in X1 or inequality between X1 and X2.  */
+
+static inline op_t
+find_zero_ne_all (op_t x1, op_t x2)
+{
+  /* Make use of the fact that we'll already have ONES in a register.  */
+  op_t ones = (op_t)-1 / 0xff;
+  return find_zero_all (x1) | (find_zero_all (x1 ^ x2) ^ ones);
+}
+
+/* Define the "inexact" versions in terms of the exact versions.  */
+#define find_zero_low		find_zero_all
+#define find_eq_low		find_eq_all
+#define find_zero_eq_low	find_zero_eq_all
+#define find_zero_ne_low	find_zero_ne_all
+
+#endif /* STRING_FZA_H */

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=bef385336624e51985182dc9401c702fdfc73817

commit bef385336624e51985182dc9401c702fdfc73817
Author: Richard Henderson <rth@twiddle.net>
Date:   Thu Feb 16 16:23:27 2017 -0200

    alpha: Add string-fzb.h and string-fzi.h
    
    While alpha has the more important string functions in assembly,
    there are still a few for find the generic routines are used.
    
    Use the CMPBGE insn, via the builtin, for testing of zeros.  Use a
    simplified expansion of __builtin_ctz when the insn isn't available.
    
    	* sysdeps/alpha/string-fza.h: New file.
    	* sysdeps/alpha/string-fzb.h: New file.
    	* sysdeps/alpha/string-fzi.h: New file.

diff --git a/sysdeps/alpha/string-fzb.h b/sysdeps/alpha/string-fzb.h
new file mode 100644
index 0000000..13d6c9f
--- /dev/null
+++ b/sysdeps/alpha/string-fzb.h
@@ -0,0 +1,51 @@
+/* string-fzb.h -- zero byte detection; boolean.  Alpha version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_FZB_H
+#define STRING_FZB_H 1
+
+#include <string-optype.h>
+
+/* Note that since CMPBGE creates a bit mask rather than a byte mask,
+   we cannot simply provide a target-specific string-fza.h.  */
+
+/* Determine if any byte within X is zero.  This is a pure boolean test.  */
+
+static inline _Bool
+has_zero (op_t x)
+{
+  return __builtin_alpha_cmpbge (0, x) != 0;
+}
+
+/* Likewise, but for byte equality between X1 and X2.  */
+
+static inline _Bool
+has_eq (op_t x1, op_t x2)
+{
+  return has_zero (x1 ^ x2);
+}
+
+/* Likewise, but for zeros in X1 and equal bytes between X1 and X2.  */
+
+static inline _Bool
+has_zero_eq (op_t x1, op_t x2)
+{
+  return has_zero (x1) | has_eq (x1, x2);
+}
+
+#endif /* STRING_FZB_H */
diff --git a/sysdeps/alpha/string-fzi.h b/sysdeps/alpha/string-fzi.h
new file mode 100644
index 0000000..243a9e5
--- /dev/null
+++ b/sysdeps/alpha/string-fzi.h
@@ -0,0 +1,113 @@
+/* string-fzi.h -- zero byte detection; indices.  Alpha version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_FZI_H
+#define STRING_FZI_H
+
+#include <limits.h>
+#include <string-optype.h>
+
+/* Note that since CMPBGE creates a bit mask rather than a byte mask,
+   we cannot simply provide a target-specific string-fza.h.  */
+
+/* A subroutine for the index_zero functions.  Given a bitmask C,
+   return the index of the first bit set in memory order.  */
+
+static inline unsigned int
+index_first_ (unsigned long int c)
+{
+#ifdef __alpha_cix__
+  return __builtin_ctzl (c);
+#else
+  c = c & -c;
+  return (c & 0xf0 ? 4 : 0) + (c & 0xcc ? 2 : 0) + (c & 0xaa ? 1 : 0);
+#endif
+}
+
+/* Similarly, but return the (memory order) index of the last bit
+   that is non-zero.  Note that only the least 8 bits may be nonzero.  */
+
+static inline unsigned int
+index_last_ (unsigned long int x)
+{
+#ifdef __alpha_cix__
+  return __builtin_clzl (x) ^ 63;
+#else
+  unsigned r = 0;
+  if (x & 0xf0)
+    r += 4;
+  if (x & (0xc << r))
+    r += 2;
+  if (x & (0x2 << r))
+    r += 1;
+  return r;
+#endif
+}
+
+/* Given a word X that is known to contain a zero byte, return the
+   index of the first such within the word in memory order.  */
+
+static inline unsigned int
+index_first_zero (op_t x)
+{
+  return index_first_ (__builtin_alpha_cmpbge (0, x));
+}
+
+/* Similarly, but perform the test for byte equality between X1 and X2.  */
+
+static inline unsigned int
+index_first_eq (op_t x1, op_t x2)
+{
+  return index_first_zero (x1 ^ x2);
+}
+
+/* Similarly, but perform the search for zero within X1 or
+   equality between X1 and X2.  */
+
+static inline unsigned int
+index_first_zero_eq (op_t x1, op_t x2)
+{
+  return index_first_ (__builtin_alpha_cmpbge (0, x1)
+		       | __builtin_alpha_cmpbge (0, x1 ^ x2));
+}
+
+/* Similarly, but perform the search for zero within X1 or
+   inequality between X1 and X2.  */
+
+static inline unsigned int
+index_first_zero_ne (op_t x1, op_t x2)
+{
+  return index_first_ (__builtin_alpha_cmpbge (0, x1)
+		       | (__builtin_alpha_cmpbge (0, x1 ^ x2) ^ 0xFF));
+}
+
+/* Similarly, but search for the last zero within X.  */
+
+static inline unsigned int
+index_last_zero (op_t x)
+{
+  return index_last_ (__builtin_alpha_cmpbge (0, x));
+}
+
+static inline unsigned int
+index_last_eq (op_t x1, op_t x2)
+{
+  return index_last_zero (x1 ^ x2);
+}
+
+#endif /* STRING_FZI_H */

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=1da9832f154b026451369da54a7860266e691c95

commit 1da9832f154b026451369da54a7860266e691c95
Author: Richard Henderson <rth@twiddle.net>
Date:   Thu Feb 16 16:22:39 2017 -0200

    hppa: Add string-fzb.h and string-fzi.h
    
    Use UXOR,SBZ to test for a zero byte within a word.  While we can
    get semi-decent code out of asm-goto, we would do slightly better
    with a compiler builtin.
    
    For index_zero et al, sequential testing of bytes is less expensive than
    any tricks that involve a count-leading-zeros insn that we don't have.
    
    	* sysdeps/hppa/string-fza.h: New file.
    	* sysdeps/hppa/string-fzb.h: New file.
    	* sysdeps/hppa/string-fzi.h: New file.

diff --git a/sysdeps/hppa/string-fzb.h b/sysdeps/hppa/string-fzb.h
new file mode 100644
index 0000000..97f1b64
--- /dev/null
+++ b/sysdeps/hppa/string-fzb.h
@@ -0,0 +1,69 @@
+/* string-fzb.h -- zero byte detection, boolean.  HPPA version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_FZB_H
+#define STRING_FZB_H 1
+
+#include <string-optype.h>
+
+/* Determine if any byte within X is zero.  This is a pure boolean test.  */
+
+static inline _Bool
+has_zero (op_t x)
+{
+  _Static_assert (sizeof (op_t) == 4, "64-bit not supported");
+
+  /* It's more useful to expose a control transfer to the compiler
+     than to expose a proper boolean result.  */
+  asm goto ("uxor,sbz %%r0,%0,%%r0\n\t"
+	    "b,n %l1" : : "r"(x) : : nbz);
+  return 1;
+ nbz:
+  return 0;
+}
+
+/* Likewise, but for byte equality between X1 and X2.  */
+
+static inline _Bool
+has_eq (op_t x1, op_t x2)
+{
+  _Static_assert (sizeof (op_t) == 4, "64-bit not supported");
+
+  asm goto ("uxor,sbz %0,%1,%%r0\n\t"
+	    "b,n %l2" : : "r"(x1), "r"(x2) : : nbz);
+  return 1;
+ nbz:
+  return 0;
+}
+
+/* Likewise, but for zeros in X1 and equal bytes between X1 and X2.  */
+
+static inline _Bool
+has_zero_eq (op_t x1, op_t x2)
+{
+  _Static_assert (sizeof (op_t) == 4, "64-bit not supported");
+
+  asm goto ("uxor,sbz %%r0,%0,%%r0\n\t"
+	    "uxor,nbz %0,%1,%%r0\n\t"
+	    "b,n %l2" : : "r"(x1), "r"(x2) : : sbz);
+  return 0;
+ sbz:
+  return 1;
+}
+
+#endif /* STRING_HASZERO_H */
diff --git a/sysdeps/hppa/string-fzi.h b/sysdeps/hppa/string-fzi.h
new file mode 100644
index 0000000..22bd8ac
--- /dev/null
+++ b/sysdeps/hppa/string-fzi.h
@@ -0,0 +1,135 @@
+/* string-fzi.h -- zero byte detection; indexes.  HPPA version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_FZI_H
+#define STRING_FZI_H 1
+
+#include <string-optype.h>
+
+/* Given a word X that is known to contain a zero byte, return the
+   index of the first such within the long in memory order.  */
+
+static inline unsigned int
+index_first_zero (op_t x)
+{
+  unsigned int ret;
+
+  _Static_assert (sizeof (op_t) == 4, "64-bit not supported");
+
+  /* Since we have no clz insn, direct tests of the bytes is faster
+     than loading up the constants to do the masking.  */
+  asm ("extrw,u,<> %1,23,8,%%r0\n\t"
+       "ldi 2,%0\n\t"
+       "extrw,u,<> %1,15,8,%%r0\n\t"
+       "ldi 1,%0\n\t"
+       "extrw,u,<> %1,7,8,%%r0\n\t"
+       "ldi 0,%0"
+       : "=r"(ret) : "r"(x), "0"(3));
+
+  return ret;
+}
+
+/* Similarly, but perform the search for byte equality between X1 and X2.  */
+
+static inline unsigned int
+index_first_eq (op_t x1, op_t x2)
+{
+  return index_first_zero (x1 ^ x2);
+}
+
+/* Similarly, but perform the search for zero within X1 or
+   equality between X1 and X2.  */
+
+static inline unsigned int
+index_first_zero_eq (op_t x1, op_t x2)
+{
+  unsigned int ret;
+
+  _Static_assert (sizeof (op_t) == 4, "64-bit not supported");
+
+  /* Since we have no clz insn, direct tests of the bytes is faster
+     than loading up the constants to do the masking.  */
+  asm ("extrw,u,= %1,23,8,%%r0\n\t"
+       "extrw,u,<> %2,23,8,%%r0\n\t"
+       "ldi 2,%0\n\t"
+       "extrw,u,= %1,15,8,%%r0\n\t"
+       "extrw,u,<> %2,15,8,%%r0\n\t"
+       "ldi 1,%0\n\t"
+       "extrw,u,= %1,7,8,%%r0\n\t"
+       "extrw,u,<> %2,7,8,%%r0\n\t"
+       "ldi 0,%0"
+       : "=r"(ret) : "r"(x1), "r"(x1 ^ x2), "0"(3));
+
+  return ret;
+}
+
+/* Similarly, but perform the search for zero within X1 or 
+   inequality between X1 and X2. */
+
+static inline unsigned int
+index_first_zero_ne (op_t x1, op_t x2)
+{
+  unsigned int ret;
+
+  _Static_assert (sizeof (op_t) == 4, "64-bit not supported");
+
+  /* Since we have no clz insn, direct tests of the bytes is faster
+     than loading up the constants to do the masking.  */
+  asm ("extrw,u,<> %2,23,8,%%r0\n\t"
+       "extrw,u,<> %1,23,8,%%r0\n\t"
+       "ldi 2,%0\n\t"
+       "extrw,u,<> %2,15,8,%%r0\n\t"
+       "extrw,u,<> %1,15,8,%%r0\n\t"
+       "ldi 1,%0\n\t"
+       "extrw,u,<> %2,7,8,%%r0\n\t"
+       "extrw,u,<> %1,7,8,%%r0\n\t"
+       "ldi 0,%0"
+       : "=r"(ret) : "r"(x1), "r"(x1 ^ x2), "0"(3));
+
+  return ret;
+}
+
+/* Similarly, but search for the last zero within X.  */
+
+static inline unsigned int
+index_last_zero (op_t x)
+{
+  unsigned int ret;
+
+  _Static_assert (sizeof (op_t) == 4, "64-bit not supported");
+
+  /* Since we have no ctz insn, direct tests of the bytes is faster
+     than loading up the constants to do the masking.  */
+  asm ("extrw,u,<> %1,15,8,%%r0\n\t"
+       "ldi 1,%0\n\t"
+       "extrw,u,<> %1,23,8,%%r0\n\t"
+       "ldi 2,%0\n\t"
+       "extrw,u,<> %1,31,8,%%r0\n\t"
+       "ldi 3,%0"
+       : "=r"(ret) : "r"(x), "0"(0));
+
+  return ret;
+}
+
+static inline unsigned int
+index_last_eq (op_t x1, op_t x2)
+{
+  return index_last_zero (x1 ^ x2);
+}
+
+#endif /* STRING_FZI_H */

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=6396dd6f4ead1ae41aa2e07103f0a68001f3e208

commit 6396dd6f4ead1ae41aa2e07103f0a68001f3e208
Author: Richard Henderson <rth@twiddle.net>
Date:   Thu Feb 16 16:22:02 2017 -0200

    hppa: Add memcopy.h
    
    GCC's combine pass cannot merge (x >> c | y << (32 - c)) into a
    double-word shift unless (1) the subtract is in the same basic block
    and (2) the result of the subtract is used exactly once.  Neither
    condition is true for any use of MERGE.
    
    By forcing the use of a double-word shift, we not only reduce
    contention on SAR, but also allow the setting of SAR to be hoisted
    outside of a loop.
    
    	* sysdeps/hppa/memcopy.h: New file.

diff --git a/sysdeps/hppa/memcopy.h b/sysdeps/hppa/memcopy.h
new file mode 100644
index 0000000..4076b8b
--- /dev/null
+++ b/sysdeps/hppa/memcopy.h
@@ -0,0 +1,44 @@
+/* memcopy.h -- definitions for memory copy functions, PA-RISC version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdeps/generic/memcopy.h>
+
+/* Use a single double-word shift instead of two shifts and an ior.
+   If the uses of MERGE were close to the computation of shl/shr,
+   the compiler might have been able to create this itself.
+   But instead that computation is well separated.
+
+   Using an inline function instead of a macro is the easiest way
+   to ensure that the types are correct.  */
+
+#undef MERGE
+
+extern void link_error(void);
+
+static inline op_t
+MERGE(op_t w0, int shl, op_t w1, int shr)
+{
+  op_t res;
+  if (OPSIZ == 4)
+    asm("shrpw %1,%2,%%sar,%0" : "=r"(res) : "r"(w0), "r"(w1), "q"(shr));
+  else if (OPSIZ == 8)
+    asm("shrpd %1,%2,%%sar,%0" : "=r"(res) : "r"(w0), "r"(w1), "q"(shr));
+  else
+    link_error(), res = 0;
+  return res;
+}

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=903270e20262487e0dbf1a12d36db172787ef2da

commit 903270e20262487e0dbf1a12d36db172787ef2da
Author: Adhemerval Zanella <adhemerval.zanella@linaro.com>
Date:   Wed Mar 8 16:56:17 2017 +0100

    Improve generic strcpy
    
    New generic implementation tries to use word operations along with
    the new string-fz{b,i} functions even for inputs with different
    alignments (with still uses aligned access plus merge operation
    to get a correct word by word comparison).
    
    	* string/strcpy.c: Rewrite using memcopy.h, string-fzb.h,
            string-fzi.h.

diff --git a/string/strcpy.c b/string/strcpy.c
index a4cce89..358b1b1 100644
--- a/string/strcpy.c
+++ b/string/strcpy.c
@@ -15,8 +15,13 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <stddef.h>
 #include <string.h>
+#include <stdint.h>
+#include <limits.h>
+#include <string-fzb.h>
+#include <string-fzi.h>
+#include <string-extbyte.h>
+#include <memcopy.h>
 
 #undef strcpy
 
@@ -28,6 +33,106 @@
 char *
 STRCPY (char *dest, const char *src)
 {
-  return memcpy (dest, src, strlen (src) + 1);
+  char *dst = dest;
+  const op_t *xs;
+  op_t *xd;
+  op_t ws;
+
+#if _STRING_ARCH_unaligned
+  /* For architectures which supports unaligned memory operations, it first
+     aligns the source pointer, reads op_t bytes at time until a zero is
+     found, and writes unaligned to destination.  */
+  uintptr_t n = -(uintptr_t) src % sizeof (op_t);
+  for (uintptr_t i = 0; i < n; ++i)
+    {
+      unsigned c = *src++;
+      *dst++ = c;
+      if (c == '\0')
+	return dest;
+    }
+  xs = (const op_t *) src;
+  ws = *xs++;
+  xd = (op_t *) dst;
+  while (!has_zero (ws))
+    {
+      *xd++ = ws;
+      ws = *xs++;
+    }
+#else
+  /* For architectures which only supports aligned accesses, it first align
+     the destination pointer.  */
+  uintptr_t n = -(uintptr_t) dst % sizeof (op_t);
+  for (uintptr_t i = 0; i < n; ++i)
+    {
+      unsigned c = *src++;
+      *dst++ = c;
+      if (c == '\0')
+	return dest;
+    }
+  xd = (op_t *) dst;
+
+  /* Destination is aligned to op_t while source might be not.  */
+  uintptr_t ofs = (uintptr_t) src % sizeof (op_t);
+  if (ofs == 0)
+    {
+      /* Aligned loop.  If a zero is found, exit to copy the remaining
+	 bytes.  */
+      xs = (const op_t *) src;
+
+      ws = *xs++;
+      while (!has_zero (ws))
+	{
+	  *xd++ = ws;
+	  ws = *xs++;
+	}
+    }
+  else
+    {
+      /* Unaligned loop: align the source pointer and mask off the
+	 undesirable bytes which is not part of the string.  */
+      op_t wsa, wsb;
+      uintptr_t sh_1, sh_2;
+
+      xs = (const op_t *)(src - ofs);
+      wsa = *xs++;
+      sh_1 = ofs * CHAR_BIT;
+      sh_2 = sizeof(op_t) * CHAR_BIT - sh_1;
+
+      /* Align the first partial op_t from source, with 0xff for the rest
+	 of the bytes so that we can also apply the has_zero test to see if we
+         have already reached EOS.  If we have, then we can simply fall
+         through to the final byte copies.  */
+      ws = MERGE (wsa, sh_1, (op_t)-1, sh_2);
+      if (!has_zero (ws))
+	{
+	  while (1)
+	    {
+	      wsb = *xs++;
+	      ws = MERGE (wsa, sh_1, wsb, sh_2);
+	      if (has_zero (wsb))
+		break;
+	      *xd++ = ws;
+	      wsa = wsb;
+	    }
+
+	  /* WS may contain bytes that we not written yet in destination.
+	     Write them down and merge with the op_t containing the EOS
+	     byte. */
+	  if (!has_zero (ws))
+	    {
+	      *xd++ = ws;
+	      ws = MERGE (wsb, sh_1, ws, sh_2);
+	    }
+	}
+    }
+#endif
+
+  /* Just copy the final bytes from op_t.  */
+  dst = (char *) xd;
+  uintptr_t fz = index_first_zero (ws);
+  for (uintptr_t i = 0; i < fz + 1; i++)
+    *dst++ = extractbyte (ws, i);
+
+  return dest;
 }
 libc_hidden_builtin_def (strcpy)
diff --git a/string/test-strcpy.c b/string/test-strcpy.c
index 2a1bf93..fa03c73 100644
--- a/string/test-strcpy.c
+++ b/string/test-strcpy.c
@@ -207,7 +207,7 @@ do_random_tests (void)
 int
 test_main (void)
 {
-  size_t i;
+  size_t i, j;
 
   test_init ();
 
@@ -222,12 +222,26 @@ test_main (void)
       do_test (0, 0, i, BIG_CHAR);
       do_test (0, i, i, SMALL_CHAR);
       do_test (i, 0, i, BIG_CHAR);
+
+      for (j = 1; j < 16; ++j)
+	{
+	  do_test (0, 0, i + j, SMALL_CHAR);
+	  do_test (0, 0, i + j, BIG_CHAR);
+	  do_test (0, i, i + j, SMALL_CHAR);
+	  do_test (i, 0, i + j, BIG_CHAR);
+	}
     }
 
   for (i = 1; i < 8; ++i)
     {
       do_test (0, 0, 8 << i, SMALL_CHAR);
       do_test (8 - i, 2 * i, 8 << i, SMALL_CHAR);
+
+      for (j = 1; j < 8; ++j)
+	{
+	  do_test (0, 0, (8 << i) + j, SMALL_CHAR);
+	  do_test (8 - i, 2 * i, (8 << i) + j, SMALL_CHAR);
+	}
     }
 
   for (i = 1; i < 8; ++i)
@@ -236,6 +250,14 @@ test_main (void)
       do_test (2 * i, i, 8 << i, BIG_CHAR);
       do_test (i, i, 8 << i, SMALL_CHAR);
       do_test (i, i, 8 << i, BIG_CHAR);
+
+      for (j = 1; j < 8; ++j)
+	{
+	  do_test (i, 2 * i, (8 << i) + j, SMALL_CHAR);
+	  do_test (2 * i, i, (8 << i) + j, BIG_CHAR);
+	  do_test (i, i, (8 << i) + j, SMALL_CHAR);
+	  do_test (i, i, (8 << i) + j, BIG_CHAR);
+	}
     }
 
   do_random_tests ();

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=d8c455b1a050a8d9806b568b1d064fa46e12f634

commit d8c455b1a050a8d9806b568b1d064fa46e12f634
Author: Richard Henderson <rth@twiddle.net>
Date:   Thu Feb 16 16:21:26 2017 -0200

    Improve generic strcmp
    
    New generic implementation tries to use word operations along with
    the new string-fz{b,i} functions even for inputs with different
    alignments (with still uses aligned access plus merge operation
    to get a correct word by word comparison).
    
    	* string/strcmp.c: Rewrite using memcopy.h, string-fzb.h,
    	string-fzi.h.

diff --git a/string/strcmp.c b/string/strcmp.c
index e198d19..731ca31 100644
--- a/string/strcmp.c
+++ b/string/strcmp.c
@@ -16,6 +16,12 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <string.h>
+#include <stdint.h>
+#include <limits.h>
+#include <string-fzb.h>
+#include <string-fzi.h>
+#include <string-extbyte.h>
+#include <memcopy.h>
 
 #undef strcmp
 
@@ -29,19 +35,91 @@
 int
 STRCMP (const char *p1, const char *p2)
 {
-  const unsigned char *s1 = (const unsigned char *) p1;
-  const unsigned char *s2 = (const unsigned char *) p2;
+  const op_t *x1, *x2;
+  op_t w1, w2;
   unsigned char c1, c2;
+  uintptr_t i, n, ofs;
+  int diff;
 
-  do
+  /* Handle the unaligned bytes of p1 first.  */
+  n = -(uintptr_t)p1 % sizeof(op_t);
+  for (i = 0; i < n; ++i)
     {
-      c1 = (unsigned char) *s1++;
-      c2 = (unsigned char) *s2++;
-      if (c1 == '\0')
-	return c1 - c2;
+      c1 = *p1++;
+      c2 = *p2++;
+      diff = c1 - c2;
+      if (c1 == '\0' || diff)
+	return diff;
     }
-  while (c1 == c2);
 
+  /* P1 is now aligned to unsigned long.  P2 may or may not be.  */
+  x1 = (const op_t *)p1;
+  w1 = *x1++;
+  ofs = (uintptr_t)p2 % sizeof(op_t);
+  if (ofs == 0)
+    {
+      x2 = (const op_t *)p2;
+      w2 = *x2++;
+      /* Aligned loop.  If a difference is found, exit to compare the
+         bytes.  Else if a zero is found we have equal strings.  */
+      while (w1 == w2)
+	{
+	  if (has_zero (w1))
+	    return 0;
+          w1 = *x1++;
+          w2 = *x2++;
+	}
+    }
+  else
+    {
+      op_t w2a, w2b;
+      uintptr_t sh_1, sh_2;
+
+      x2 = (const op_t *)(p2 - ofs);
+      w2a = *x2++;
+      sh_1 = ofs * CHAR_BIT;
+      sh_2 = sizeof(op_t) * CHAR_BIT - sh_1;
+
+      /* Align the first partial of P2, with 0xff for the rest of the
+         bytes so that we can also apply the has_zero test to see if we
+         have already reached EOS.  If we have, then we can simply fall
+         through to the final comparison.  */
+      w2 = MERGE (w2a, sh_1, (op_t)-1, sh_2);
+      if (!has_zero (w2))
+	{
+	  /* Unaligned loop.  The invariant is that W2B, which is "ahead"
+             of W1, does not contain end-of-string.  Therefore it is safe
+             (and necessary) to read another word from each while we do
+             not have a difference.  */
+	  while (1)
+	    {
+	      w2b = *x2++;
+	      w2 = MERGE (w2a, sh_1, w2b, sh_2);
+	      if (w1 != w2)
+		goto final_cmp;
+	      if (has_zero (w2b))
+		break;
+	      w1 = *x1++;
+	      w2a = w2b;
+	    }
+
+	  /* Zero found in the second partial of P2.  If we had EOS
+	     in the aligned word, we have equality.  */
+	  if (has_zero (w1))
+	    return 0;
+
+          /* Load the final word of P1 and align the final partial of P2.  */
+	  w1 = *x1++;
+          w2 = MERGE (w2b, sh_1, 0, sh_2);
+	}
+    }
+
+ final_cmp:
+  /* We have two aligned words of data.  */
+  i = index_first_zero_ne (w1, w2);
+  c1 = extractbyte (w1, i);
+  c2 = extractbyte (w2, i);
   return c1 - c2;
 }
+
 libc_hidden_builtin_def (strcmp)

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2dba7f25afce8fe2f38ce333bfd6506105b89633

commit 2dba7f25afce8fe2f38ce333bfd6506105b89633
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date:   Thu Feb 16 16:21:03 2017 -0200

    Improve generic strnlen
    
    With an optimized memchr, new strnlen implementation basically calls
    memchr and adjust the result pointer value.
    
    	[BZ #5806]
    	* string/strnlen.c: Rewrite in terms of memchr.

diff --git a/string/strnlen.c b/string/strnlen.c
index c2ce1eb..a3ec6af 100644
--- a/string/strnlen.c
+++ b/string/strnlen.c
@@ -21,146 +21,21 @@
    not, see <http://www.gnu.org/licenses/>.  */
 
 #include <string.h>
-#include <stdlib.h>
 
 /* Find the length of S, but scan at most MAXLEN characters.  If no
    '\0' terminator is found in that many characters, return MAXLEN.  */
 
-#ifdef STRNLEN
-# define __strnlen STRNLEN
+#ifndef STRNLEN
+# define STRNLEN __strnlen
 #endif
 
 size_t
-__strnlen (const char *str, size_t maxlen)
+STRNLEN (const char *str, size_t maxlen)
 {
-  const char *char_ptr, *end_ptr = str + maxlen;
-  const unsigned long int *longword_ptr;
-  unsigned long int longword, himagic, lomagic;
-
-  if (maxlen == 0)
-    return 0;
-
-  if (__glibc_unlikely (end_ptr < str))
-    end_ptr = (const char *) ~0UL;
-
-  /* Handle the first few characters by reading one character at a time.
-     Do this until CHAR_PTR is aligned on a longword boundary.  */
-  for (char_ptr = str; ((unsigned long int) char_ptr
-			& (sizeof (longword) - 1)) != 0;
-       ++char_ptr)
-    if (*char_ptr == '\0')
-      {
-	if (char_ptr > end_ptr)
-	  char_ptr = end_ptr;
-	return char_ptr - str;
-      }
-
-  /* All these elucidatory comments refer to 4-byte longwords,
-     but the theory applies equally well to 8-byte longwords.  */
-
-  longword_ptr = (unsigned long int *) char_ptr;
-
-  /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
-     the "holes."  Note that there is a hole just to the left of
-     each byte, with an extra at the end:
-
-     bits:  01111110 11111110 11111110 11111111
-     bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
-
-     The 1-bits make sure that carries propagate to the next 0-bit.
-     The 0-bits provide holes for carries to fall into.  */
-  himagic = 0x80808080L;
-  lomagic = 0x01010101L;
-  if (sizeof (longword) > 4)
-    {
-      /* 64-bit version of the magic.  */
-      /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
-      himagic = ((himagic << 16) << 16) | himagic;
-      lomagic = ((lomagic << 16) << 16) | lomagic;
-    }
-  if (sizeof (longword) > 8)
-    abort ();
-
-  /* Instead of the traditional loop which tests each character,
-     we will test a longword at a time.  The tricky part is testing
-     if *any of the four* bytes in the longword in question are zero.  */
-  while (longword_ptr < (unsigned long int *) end_ptr)
-    {
-      /* We tentatively exit the loop if adding MAGIC_BITS to
-	 LONGWORD fails to change any of the hole bits of LONGWORD.
-
-	 1) Is this safe?  Will it catch all the zero bytes?
-	 Suppose there is a byte with all zeros.  Any carry bits
-	 propagating from its left will fall into the hole at its
-	 least significant bit and stop.  Since there will be no
-	 carry from its most significant bit, the LSB of the
-	 byte to the left will be unchanged, and the zero will be
-	 detected.
-
-	 2) Is this worthwhile?  Will it ignore everything except
-	 zero bytes?  Suppose every byte of LONGWORD has a bit set
-	 somewhere.  There will be a carry into bit 8.  If bit 8
-	 is set, this will carry into bit 16.  If bit 8 is clear,
-	 one of bits 9-15 must be set, so there will be a carry
-	 into bit 16.  Similarly, there will be a carry into bit
-	 24.  If one of bits 24-30 is set, there will be a carry
-	 into bit 31, so all of the hole bits will be changed.
-
-	 The one misfire occurs when bits 24-30 are clear and bit
-	 31 is set; in this case, the hole at bit 31 is not
-	 changed.  If we had access to the processor carry flag,
-	 we could close this loophole by putting the fourth hole
-	 at bit 32!
-
-	 So it ignores everything except 128's, when they're aligned
-	 properly.  */
-
-      longword = *longword_ptr++;
-
-      if ((longword - lomagic) & himagic)
-	{
-	  /* Which of the bytes was the zero?  If none of them were, it was
-	     a misfire; continue the search.  */
-
-	  const char *cp = (const char *) (longword_ptr - 1);
-
-	  char_ptr = cp;
-	  if (cp[0] == 0)
-	    break;
-	  char_ptr = cp + 1;
-	  if (cp[1] == 0)
-	    break;
-	  char_ptr = cp + 2;
-	  if (cp[2] == 0)
-	    break;
-	  char_ptr = cp + 3;
-	  if (cp[3] == 0)
-	    break;
-	  if (sizeof (longword) > 4)
-	    {
-	      char_ptr = cp + 4;
-	      if (cp[4] == 0)
-		break;
-	      char_ptr = cp + 5;
-	      if (cp[5] == 0)
-		break;
-	      char_ptr = cp + 6;
-	      if (cp[6] == 0)
-		break;
-	      char_ptr = cp + 7;
-	      if (cp[7] == 0)
-		break;
-	    }
-	}
-      char_ptr = end_ptr;
-    }
-
-  if (char_ptr > end_ptr)
-    char_ptr = end_ptr;
-  return char_ptr - str;
+  const char *found = memchr (str, '\0', maxlen);
+  return found ? found - str : maxlen;
 }
-#ifndef STRNLEN
-libc_hidden_def (__strnlen)
+
 weak_alias (__strnlen, strnlen)
-#endif
+libc_hidden_def (__strnlen)
 libc_hidden_def (strnlen)
diff --git a/sysdeps/i386/i686/multiarch/strnlen-c.c b/sysdeps/i386/i686/multiarch/strnlen-c.c
index 351e939..bfbf811 100644
--- a/sysdeps/i386/i686/multiarch/strnlen-c.c
+++ b/sysdeps/i386/i686/multiarch/strnlen-c.c
@@ -1,10 +1,15 @@
 #define STRNLEN  __strnlen_ia32
+#undef weak_alias
+#define weak_alias(a,b)
+#undef libc_hidden_def
+#define libc_hidden_def(a)
+
+#include <string/strnlen.c>
+
 #ifdef SHARED
-# undef libc_hidden_def
-# define libc_hidden_def(name)  \
-    __hidden_ver1 (__strnlen_ia32, __GI_strnlen, __strnlen_ia32); \
-    strong_alias (__strnlen_ia32, __strnlen_ia32_1); \
-    __hidden_ver1 (__strnlen_ia32_1, __GI___strnlen, __strnlen_ia32_1);
+/* Alias for internal symbol to avoid PLT generation, it redirects the
+   libc_hidden_def (__strnlen/strlen) to default implementation.  */
+__hidden_ver1 (__strnlen_ia32, __GI_strnlen, __strnlen_ia32);
+strong_alias (__strnlen_ia32, __strnlen_ia32_1);
+__hidden_ver1 (__strnlen_ia32_1, __GI___strnlen, __strnlen_ia32_1);
 #endif
-
-#include "string/strnlen.c"
diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-ppc32.c b/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-ppc32.c
index df940d3..e2ccd21 100644
--- a/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-ppc32.c
+++ b/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-ppc32.c
@@ -17,12 +17,17 @@
    <http://www.gnu.org/licenses/>.  */
 
 #define STRNLEN  __strnlen_ppc
-#ifdef SHARED
-# undef libc_hidden_def
-# define libc_hidden_def(name)  \
-    __hidden_ver1 (__strnlen_ppc, __GI_strnlen, __strnlen_ppc); \
-    strong_alias (__strnlen_ppc, __strnlen_ppc_1); \
-    __hidden_ver1 (__strnlen_ppc_1, __GI___strnlen, __strnlen_ppc_1);
-#endif
+#undef weak_alias
+#define weak_alias(a,b)
+#undef libc_hidden_def
+#define libc_hidden_def(a)
 
 #include <string/strnlen.c>
+
+#ifdef SHARED
+/* Alias for internal symbol to avoid PLT generation, it redirects the
+   libc_hidden_def (__strnlen/strlen) to default implementation.  */
+__hidden_ver1 (__strnlen_ppc, __GI_strnlen, __strnlen_ppc); \
+strong_alias (__strnlen_ppc, __strnlen_ppc_1); \
+__hidden_ver1 (__strnlen_ppc_1, __GI___strnlen, __strnlen_ppc_1);
+#endif
diff --git a/sysdeps/s390/multiarch/strnlen-c.c b/sysdeps/s390/multiarch/strnlen-c.c
index 353e83e..f77f59d 100644
--- a/sysdeps/s390/multiarch/strnlen-c.c
+++ b/sysdeps/s390/multiarch/strnlen-c.c
@@ -18,13 +18,19 @@
 
 #if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
 # define STRNLEN  __strnlen_c
+# undef weak_alias
+# define weak_alias(a,b)
+# undef libc_hidden_def
+# define libc_hidden_def(a)
+
+# include <string/strnlen.c>
+
 # ifdef SHARED
-#  undef libc_hidden_def
-#  define libc_hidden_def(name)					\
-  __hidden_ver1 (__strnlen_c, __GI_strnlen, __strnlen_c);	\
-  strong_alias (__strnlen_c, __strnlen_c_1);			\
-  __hidden_ver1 (__strnlen_c_1, __GI___strnlen, __strnlen_c_1);
+/* Alias for internal symbol to avoid PLT generation, it redirects the
+   libc_hidden_def (__strnlen/strlen) to default implementation.  */
+__hidden_ver1 (__strnlen_c, __GI_strnlen, __strnlen_c);
+strong_alias (__strnlen_c, __strnlen_c_1);
+__hidden_ver1 (__strnlen_c_1, __GI___strnlen, __strnlen_c_1);
 # endif /* SHARED */
 
-# include <string/strnlen.c>
 #endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=445306b7ebb5b544509313edde654315fc34c8a3

commit 445306b7ebb5b544509313edde654315fc34c8a3
Author: Richard Henderson <rth@twiddle.net>
Date:   Thu Feb 16 16:20:35 2017 -0200

    Improve generic memrchr
    
    New algorithm have the following key differences:
    
      - Use string-fz{b,i} functions.
    
    	[BZ #5806]
    	* string/memrchr.c: Use string-fzb.h, string-fzi.h.

diff --git a/string/memrchr.c b/string/memrchr.c
index 191b89a..5ae9c81 100644
--- a/string/memrchr.c
+++ b/string/memrchr.c
@@ -21,177 +21,64 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <stdlib.h>
-
-#ifdef HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-#if defined _LIBC
-# include <string.h>
-# include <memcopy.h>
-#endif
-
-#if defined HAVE_LIMITS_H || defined _LIBC
-# include <limits.h>
-#endif
-
-#define LONG_MAX_32_BITS 2147483647
-
-#ifndef LONG_MAX
-# define LONG_MAX LONG_MAX_32_BITS
-#endif
-
-#include <sys/types.h>
+#include <string.h>
+#include <stdint.h>
+#include <limits.h>
+#include <string-fzb.h>
+#include <string-fzi.h>
+#include <string-opthr.h>
+#include <string-maskoff.h>
 
 #undef __memrchr
 #undef memrchr
 
-#ifndef weak_alias
-# define __memrchr memrchr
+#ifndef MEMRCHR
+# define MEMRCHR __memrchr
 #endif
 
-/* Search no more than N bytes of S for C.  */
 void *
-#ifndef MEMRCHR
-__memrchr
-#else
-MEMRCHR
-#endif
-     (const void *s, int c_in, size_t n)
+MEMRCHR (const void *s, int c_in, size_t n)
 {
-  const unsigned char *char_ptr;
-  const unsigned long int *longword_ptr;
-  unsigned long int longword, magic_bits, charmask;
-  unsigned char c;
-
-  c = (unsigned char) c_in;
+  uintptr_t s_int = (uintptr_t) s;
+  uintptr_t lbyte_int = s_int + n;
 
   /* Handle the last few characters by reading one character at a time.
-     Do this until CHAR_PTR is aligned on a longword boundary.  */
-  for (char_ptr = (const unsigned char *) s + n;
-       n > 0 && ((unsigned long int) char_ptr
-		 & (sizeof (longword) - 1)) != 0;
-       --n)
-    if (*--char_ptr == c)
+     Do this until CHAR_PTR is aligned on a word boundary, or
+     the entirety of small inputs.  */
+  const unsigned char *char_ptr = (const unsigned char *) lbyte_int;
+  size_t align = lbyte_int % sizeof (op_t);
+  if (n < OP_T_THRES || align > n)
+    align = n;
+  for (size_t i = 0; i < align; ++i)
+    if (*--char_ptr == c_in)
       return (void *) char_ptr;
 
-  /* All these elucidatory comments refer to 4-byte longwords,
-     but the theory applies equally well to 8-byte longwords.  */
-
-  longword_ptr = (const unsigned long int *) char_ptr;
-
-  /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
-     the "holes."  Note that there is a hole just to the left of
-     each byte, with an extra at the end:
-
-     bits:  01111110 11111110 11111110 11111111
-     bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
-
-     The 1-bits make sure that carries propagate to the next 0-bit.
-     The 0-bits provide holes for carries to fall into.  */
-  magic_bits = -1;
-  magic_bits = magic_bits / 0xff * 0xfe << 1 >> 1 | 1;
+  const op_t *word_ptr = (const op_t *) char_ptr;
+  n -= align;
+  if (__glibc_unlikely (n == 0))
+    return NULL;
 
-  /* Set up a longword, each of whose bytes is C.  */
-  charmask = c | (c << 8);
-  charmask |= charmask << 16;
-#if LONG_MAX > LONG_MAX_32_BITS
-  charmask |= charmask << 32;
-#endif
-
-  /* Instead of the traditional loop which tests each character,
-     we will test a longword at a time.  The tricky part is testing
-     if *any of the four* bytes in the longword in question are zero.  */
-  while (n >= sizeof (longword))
-    {
-      /* We tentatively exit the loop if adding MAGIC_BITS to
-	 LONGWORD fails to change any of the hole bits of LONGWORD.
-
-	 1) Is this safe?  Will it catch all the zero bytes?
-	 Suppose there is a byte with all zeros.  Any carry bits
-	 propagating from its left will fall into the hole at its
-	 least significant bit and stop.  Since there will be no
-	 carry from its most significant bit, the LSB of the
-	 byte to the left will be unchanged, and the zero will be
-	 detected.
-
-	 2) Is this worthwhile?  Will it ignore everything except
-	 zero bytes?  Suppose every byte of LONGWORD has a bit set
-	 somewhere.  There will be a carry into bit 8.  If bit 8
-	 is set, this will carry into bit 16.  If bit 8 is clear,
-	 one of bits 9-15 must be set, so there will be a carry
-	 into bit 16.  Similarly, there will be a carry into bit
-	 24.  If one of bits 24-30 is set, there will be a carry
-	 into bit 31, so all of the hole bits will be changed.
-
-	 The one misfire occurs when bits 24-30 are clear and bit
-	 31 is set; in this case, the hole at bit 31 is not
-	 changed.  If we had access to the processor carry flag,
-	 we could close this loophole by putting the fourth hole
-	 at bit 32!
-
-	 So it ignores everything except 128's, when they're aligned
-	 properly.
-
-	 3) But wait!  Aren't we looking for C, not zero?
-	 Good point.  So what we do is XOR LONGWORD with a longword,
-	 each of whose bytes is C.  This turns each byte that is C
-	 into a zero.  */
-
-      longword = *--longword_ptr ^ charmask;
+  /* Compute the address of the word containing the initial byte. */
+  const op_t *lword = (const op_t *) (s_int & -sizeof (op_t));
 
-      /* Add MAGIC_BITS to LONGWORD.  */
-      if ((((longword + magic_bits)
+  /* Set up a word, each of whose bytes is C.  */
+  op_t repeated_c = repeat_bytes (c_in);
 
-	    /* Set those bits that were unchanged by the addition.  */
-	    ^ ~longword)
+  char *ret;
+  op_t word;
 
-	   /* Look at only the hole bits.  If any of the hole bits
-	      are unchanged, most likely one of the bytes was a
-	      zero.  */
-	   & ~magic_bits) != 0)
-	{
-	  /* Which of the bytes was C?  If none of them were, it was
-	     a misfire; continue the search.  */
-
-	  const unsigned char *cp = (const unsigned char *) longword_ptr;
-
-#if LONG_MAX > 2147483647
-	  if (cp[7] == c)
-	    return (void *) &cp[7];
-	  if (cp[6] == c)
-	    return (void *) &cp[6];
-	  if (cp[5] == c)
-	    return (void *) &cp[5];
-	  if (cp[4] == c)
-	    return (void *) &cp[4];
-#endif
-	  if (cp[3] == c)
-	    return (void *) &cp[3];
-	  if (cp[2] == c)
-	    return (void *) &cp[2];
-	  if (cp[1] == c)
-	    return (void *) &cp[1];
-	  if (cp[0] == c)
-	    return (void *) cp;
-	}
-
-      n -= sizeof (longword);
-    }
-
-  char_ptr = (const unsigned char *) longword_ptr;
-
-  while (n-- > 0)
+  while (word_ptr != lword)
     {
-      if (*--char_ptr == c)
-	return (void *) char_ptr;
+      word = *--word_ptr;
+      if (has_eq (word, repeated_c))
+	goto found;
     }
+  return NULL;
 
-  return 0;
+found:
+  /* We found a match, but it might be in a byte past the start 
+     of the array.  */
+  ret = (char *) word_ptr + index_last_eq (word, repeated_c);
+  return (ret >= (char*) s) ? ret : NULL;
 }
-#ifndef MEMRCHR
-# ifdef weak_alias
 weak_alias (__memrchr, memrchr)
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memrchr-c.c b/sysdeps/i386/i686/multiarch/memrchr-c.c
index ef7bbbe..0e0f30e 100644
--- a/sysdeps/i386/i686/multiarch/memrchr-c.c
+++ b/sysdeps/i386/i686/multiarch/memrchr-c.c
@@ -1,5 +1,7 @@
 #if IS_IN (libc)
 # define MEMRCHR  __memrchr_ia32
+# undef weak_alias
+# define weak_alias(a, b)
 # include <string.h>
 extern void *__memrchr_ia32 (const void *, int, size_t);
 #endif
diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c b/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c
index 8f9f279..2a1b11e 100644
--- a/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c
+++ b/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c
@@ -18,6 +18,8 @@
 
 #if IS_IN (libc)
 # define MEMRCHR  __memrchr_ppc
+# undef weak_alias
+# define weak_alias(a,b)
 # include <string.h>
 extern void *__memrchr_ppc (const void *, int, size_t);
 #endif
diff --git a/sysdeps/s390/multiarch/memrchr-c.c b/sysdeps/s390/multiarch/memrchr-c.c
index 1e3c914..ebf2f98 100644
--- a/sysdeps/s390/multiarch/memrchr-c.c
+++ b/sysdeps/s390/multiarch/memrchr-c.c
@@ -18,6 +18,8 @@
 
 #if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
 # define MEMRCHR  __memrchr_c
+# undef weak_alias
+# define weak_alias(a, b)
 
 # include <string.h>
 extern __typeof (__memrchr) __memrchr_c;

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=231a8739e99c60c6ba0804b22e18093c402d0b03

commit 231a8739e99c60c6ba0804b22e18093c402d0b03
Author: Richard Henderson <rth@twiddle.net>
Date:   Thu Feb 16 16:19:40 2017 -0200

    Improve generic strchrnul
    
    New algorithm have the following key differences:
    
      - Reads first word unaligned and use string-maskoff function to
        remove unwanted data.  This strategy follow assemble optimized
        ones for aarch64, powerpc and tile.
    
      - Use string-fz{b,i} functions.
    
    	[BZ #5806]
    	* string/strchrnul.c: Use string-fzb.h, string-fzi.h.

diff --git a/string/strchrnul.c b/string/strchrnul.c
index 5a17602..beeab88 100644
--- a/string/strchrnul.c
+++ b/string/strchrnul.c
@@ -21,8 +21,12 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <string.h>
-#include <memcopy.h>
 #include <stdlib.h>
+#include <stdint.h>
+#include <string-fza.h>
+#include <string-fzb.h>
+#include <string-fzi.h>
+#include <string-maskoff.h>
 
 #undef __strchrnul
 #undef strchrnul
@@ -33,134 +37,34 @@
 
 /* Find the first occurrence of C in S or the final NUL byte.  */
 char *
-STRCHRNUL (const char *s, int c_in)
+STRCHRNUL (const char *str, int c_in)
 {
-  const unsigned char *char_ptr;
-  const unsigned long int *longword_ptr;
-  unsigned long int longword, magic_bits, charmask;
-  unsigned char c;
+  const op_t *word_ptr;
+  op_t found, word;
 
-  c = (unsigned char) c_in;
+  /* Set up a word, each of whose bytes is C.  */
+  op_t repeated_c = repeat_bytes (c_in);
 
-  /* Handle the first few characters by reading one character at a time.
-     Do this until CHAR_PTR is aligned on a longword boundary.  */
-  for (char_ptr = (const unsigned char *) s;
-       ((unsigned long int) char_ptr & (sizeof (longword) - 1)) != 0;
-       ++char_ptr)
-    if (*char_ptr == c || *char_ptr == '\0')
-      return (void *) char_ptr;
+  /* Align the input address to op_t.  */
+  uintptr_t s_int = (uintptr_t) str;
+  word_ptr = (op_t*) (s_int & -sizeof (op_t));
 
-  /* All these elucidatory comments refer to 4-byte longwords,
-     but the theory applies equally well to 8-byte longwords.  */
+  /* Read the first aligned word, but force bytes before the string to
+     match neither zero nor goal (we make sure the high bit of each byte
+     is 1, and the low 7 bits are all the opposite of the goal byte).  */
+  op_t bmask = create_mask (s_int);
+  word = (*word_ptr | bmask) ^ (repeated_c & highbit_mask (bmask));
 
-  longword_ptr = (unsigned long int *) char_ptr;
-
-  /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
-     the "holes."  Note that there is a hole just to the left of
-     each byte, with an extra at the end:
-
-     bits:  01111110 11111110 11111110 11111111
-     bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
-
-     The 1-bits make sure that carries propagate to the next 0-bit.
-     The 0-bits provide holes for carries to fall into.  */
-  magic_bits = -1;
-  magic_bits = magic_bits / 0xff * 0xfe << 1 >> 1 | 1;
-
-  /* Set up a longword, each of whose bytes is C.  */
-  charmask = c | (c << 8);
-  charmask |= charmask << 16;
-  if (sizeof (longword) > 4)
-    /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
-    charmask |= (charmask << 16) << 16;
-  if (sizeof (longword) > 8)
-    abort ();
-
-  /* Instead of the traditional loop which tests each character,
-     we will test a longword at a time.  The tricky part is testing
-     if *any of the four* bytes in the longword in question are zero.  */
-  for (;;)
+  while (1)
     {
-      /* We tentatively exit the loop if adding MAGIC_BITS to
-	 LONGWORD fails to change any of the hole bits of LONGWORD.
-
-	 1) Is this safe?  Will it catch all the zero bytes?
-	 Suppose there is a byte with all zeros.  Any carry bits
-	 propagating from its left will fall into the hole at its
-	 least significant bit and stop.  Since there will be no
-	 carry from its most significant bit, the LSB of the
-	 byte to the left will be unchanged, and the zero will be
-	 detected.
-
-	 2) Is this worthwhile?  Will it ignore everything except
-	 zero bytes?  Suppose every byte of LONGWORD has a bit set
-	 somewhere.  There will be a carry into bit 8.  If bit 8
-	 is set, this will carry into bit 16.  If bit 8 is clear,
-	 one of bits 9-15 must be set, so there will be a carry
-	 into bit 16.  Similarly, there will be a carry into bit
-	 24.  If one of bits 24-30 is set, there will be a carry
-	 into bit 31, so all of the hole bits will be changed.
-
-	 The one misfire occurs when bits 24-30 are clear and bit
-	 31 is set; in this case, the hole at bit 31 is not
-	 changed.  If we had access to the processor carry flag,
-	 we could close this loophole by putting the fourth hole
-	 at bit 32!
-
-	 So it ignores everything except 128's, when they're aligned
-	 properly.
-
-	 3) But wait!  Aren't we looking for C as well as zero?
-	 Good point.  So what we do is XOR LONGWORD with a longword,
-	 each of whose bytes is C.  This turns each byte that is C
-	 into a zero.  */
-
-      longword = *longword_ptr++;
-
-      /* Add MAGIC_BITS to LONGWORD.  */
-      if ((((longword + magic_bits)
-
-	    /* Set those bits that were unchanged by the addition.  */
-	    ^ ~longword)
-
-	   /* Look at only the hole bits.  If any of the hole bits
-	      are unchanged, most likely one of the bytes was a
-	      zero.  */
-	   & ~magic_bits) != 0 ||
-
-	  /* That caught zeroes.  Now test for C.  */
-	  ((((longword ^ charmask) + magic_bits) ^ ~(longword ^ charmask))
-	   & ~magic_bits) != 0)
-	{
-	  /* Which of the bytes was C or zero?
-	     If none of them were, it was a misfire; continue the search.  */
-
-	  const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
-
-	  if (*cp == c || *cp == '\0')
-	    return (char *) cp;
-	  if (*++cp == c || *cp == '\0')
-	    return (char *) cp;
-	  if (*++cp == c || *cp == '\0')
-	    return (char *) cp;
-	  if (*++cp == c || *cp == '\0')
-	    return (char *) cp;
-	  if (sizeof (longword) > 4)
-	    {
-	      if (*++cp == c || *cp == '\0')
-		return (char *) cp;
-	      if (*++cp == c || *cp == '\0')
-		return (char *) cp;
-	      if (*++cp == c || *cp == '\0')
-		return (char *) cp;
-	      if (*++cp == c || *cp == '\0')
-		return (char *) cp;
-	    }
-	}
+      if (has_zero_eq (word, repeated_c))
+        break;
+      word = *++word_ptr;
     }
 
-  /* This should never happen.  */
-  return NULL;
+  found = index_first_zero_eq (word, repeated_c);
+
+  return (char *) (word_ptr) + found;
 }
 
 weak_alias (__strchrnul, strchrnul)

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=7fab7d55da5cbcccc034188a4bec35b8a0522402

commit 7fab7d55da5cbcccc034188a4bec35b8a0522402
Author: Richard Henderson <rth@twiddle.net>
Date:   Thu Feb 16 16:19:12 2017 -0200

    Improve generic memchr
    
    New algorithm have the following key differences:
    
      - Reads first word unaligned and use string-maskoff function to
        remove unwanted data.  This strategy follow assemble optimized
        ones for aarch64, powerpc and tile.
    
      - Use string-fz{b,i} and string-opthr functions.
    
    	[BZ #5806]
    	* string/memchr.c: Use string-fzb.h, string-fzi.h, string-opthr.h.

diff --git a/string/memchr.c b/string/memchr.c
index 523f9fe..dc37ad7 100644
--- a/string/memchr.c
+++ b/string/memchr.c
@@ -20,20 +20,16 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifndef _LIBC
-# include <config.h>
-#endif
-
 #include <string.h>
-
 #include <stddef.h>
+#include <stdint.h>
+#include <string-fza.h>
+#include <string-fzb.h>
+#include <string-fzi.h>
+#include <string-maskoff.h>
+#include <string-opthr.h>
 
-#include <limits.h>
-
-#undef __memchr
-#ifdef _LIBC
-# undef memchr
-#endif
+#undef memchr
 
 #ifndef MEMCHR
 # define MEMCHR memchr
@@ -43,113 +39,46 @@
 void *
 MEMCHR (void const *s, int c_in, size_t n)
 {
-  /* On 32-bit hardware, choosing longword to be a 32-bit unsigned
-     long instead of a 64-bit uintmax_t tends to give better
-     performance.  On 64-bit hardware, unsigned long is generally 64
-     bits already.  Change this typedef to experiment with
-     performance.  */
-  typedef unsigned long int longword;
-
-  const unsigned char *char_ptr;
-  const longword *longword_ptr;
-  longword repeated_one;
-  longword repeated_c;
-  unsigned char c;
-
-  c = (unsigned char) c_in;
-
-  /* Handle the first few bytes by reading one byte at a time.
-     Do this until CHAR_PTR is aligned on a longword boundary.  */
-  for (char_ptr = (const unsigned char *) s;
-       n > 0 && (size_t) char_ptr % sizeof (longword) != 0;
-       --n, ++char_ptr)
-    if (*char_ptr == c)
-      return (void *) char_ptr;
-
-  longword_ptr = (const longword *) char_ptr;
-
-  /* All these elucidatory comments refer to 4-byte longwords,
-     but the theory applies equally well to any size longwords.  */
-
-  /* Compute auxiliary longword values:
-     repeated_one is a value which has a 1 in every byte.
-     repeated_c has c in every byte.  */
-  repeated_one = 0x01010101;
-  repeated_c = c | (c << 8);
-  repeated_c |= repeated_c << 16;
-  if (0xffffffffU < (longword) -1)
-    {
-      repeated_one |= repeated_one << 31 << 1;
-      repeated_c |= repeated_c << 31 << 1;
-      if (8 < sizeof (longword))
-	{
-	  size_t i;
-
-	  for (i = 64; i < sizeof (longword) * 8; i *= 2)
-	    {
-	      repeated_one |= repeated_one << i;
-	      repeated_c |= repeated_c << i;
-	    }
-	}
-    }
+  const op_t *word_ptr, *lword;
+  op_t repeated_c, before_mask, word;
+  const char *lbyte;
+  char *ret;
+  uintptr_t s_int;
 
-  /* Instead of the traditional loop which tests each byte, we will test a
-     longword at a time.  The tricky part is testing if *any of the four*
-     bytes in the longword in question are equal to c.  We first use an xor
-     with repeated_c.  This reduces the task to testing whether *any of the
-     four* bytes in longword1 is zero.
-
-     We compute tmp =
-       ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7).
-     That is, we perform the following operations:
-       1. Subtract repeated_one.
-       2. & ~longword1.
-       3. & a mask consisting of 0x80 in every byte.
-     Consider what happens in each byte:
-       - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff,
-	 and step 3 transforms it into 0x80.  A carry can also be propagated
-	 to more significant bytes.
-       - If a byte of longword1 is nonzero, let its lowest 1 bit be at
-	 position k (0 <= k <= 7); so the lowest k bits are 0.  After step 1,
-	 the byte ends in a single bit of value 0 and k bits of value 1.
-	 After step 2, the result is just k bits of value 1: 2^k - 1.  After
-	 step 3, the result is 0.  And no carry is produced.
-     So, if longword1 has only non-zero bytes, tmp is zero.
-     Whereas if longword1 has a zero byte, call j the position of the least
-     significant zero byte.  Then the result has a zero at positions 0, ...,
-     j-1 and a 0x80 at position j.  We cannot predict the result at the more
-     significant bytes (positions j+1..3), but it does not matter since we
-     already have a non-zero bit at position 8*j+7.
-
-     So, the test whether any byte in longword1 is zero is equivalent to
-     testing whether tmp is nonzero.  */
-
-  while (n >= sizeof (longword))
-    {
-      longword longword1 = *longword_ptr ^ repeated_c;
 
-      if ((((longword1 - repeated_one) & ~longword1)
-	   & (repeated_one << 7)) != 0)
-	break;
-      longword_ptr++;
-      n -= sizeof (longword);
-    }
+  if (__glibc_unlikely (n == 0))
+    return NULL;
+
+  s_int = (uintptr_t) s;
+  word_ptr = (const op_t*) (s_int & -sizeof (op_t));
+
+  /* Set up a word, each of whose bytes is C.  */
+  repeated_c = repeat_bytes (c_in);
+  before_mask = create_mask (s_int);
+
+  /* Compute the address of the last byte taking in consideration possible
+     overflow.  */
+  uintptr_t lbyte_int = s_int + n - 1;
+  lbyte_int |= -(lbyte_int < s_int);
+  lbyte = (const char *) lbyte_int;
 
-  char_ptr = (const unsigned char *) longword_ptr;
+  /* Compute the address of the word containing the last byte. */
+  lword = (const op_t *) ((uintptr_t) lbyte & -sizeof (op_t));
 
-  /* At this point, we know that either n < sizeof (longword), or one of the
-     sizeof (longword) bytes starting at char_ptr is == c.  On little-endian
-     machines, we could determine the first such byte without any further
-     memory accesses, just by looking at the tmp result from the last loop
-     iteration.  But this does not work on big-endian machines.  Choose code
-     that works in both cases.  */
+  /* Read the first word, but munge it so that bytes before the array
+     will not match goal.  */
+  word = (*word_ptr | before_mask) ^ (repeated_c & before_mask);
 
-  for (; n > 0; --n, ++char_ptr)
+  while (has_eq (word, repeated_c) == 0)
     {
-      if (*char_ptr == c)
-	return (void *) char_ptr;
+      if (word_ptr == lword)
+	return NULL;
+      word = *++word_ptr;
     }
 
-  return NULL;
+  /* We found a match, but it might be in a byte past the end
+     of the array.  */
+  ret = (char *) word_ptr + index_first_eq (word, repeated_c);
+  return (ret <= lbyte) ? ret : NULL;
 }
 libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/alpha/memchr.c b/sysdeps/alpha/memchr.c
deleted file mode 100644
index 11b0c73..0000000
--- a/sysdeps/alpha/memchr.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/* Copyright (C) 2010-2018 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <string.h>
-
-typedef unsigned long word;
-
-static inline word
-ldq_u(const void *s)
-{
-  return *(const word *)((word)s & -8);
-}
-
-#define unlikely(X)	__builtin_expect ((X), 0)
-#define prefetch(X)	__builtin_prefetch ((void *)(X), 0)
-
-#define cmpbeq0(X)	__builtin_alpha_cmpbge(0, (X))
-#define find(X, Y)	cmpbeq0 ((X) ^ (Y))
-
-/* Search no more than N bytes of S for C.  */
-
-void *
-__memchr (const void *s, int xc, size_t n)
-{
-  const word *s_align;
-  word t, current, found, mask, offset;
-
-  if (unlikely (n == 0))
-    return 0;
-
-  current = ldq_u (s);
-
-  /* Replicate low byte of XC into all bytes of C.  */
-  t = xc & 0xff;			/* 0000000c */
-  t = (t << 8) | t;			/* 000000cc */
-  t = (t << 16) | t;			/* 0000cccc */
-  const word c = (t << 32) | t;		/* cccccccc */
-
-  /* Align the source, and decrement the count by the number
-     of bytes searched in the first word.  */
-  s_align = (const word *)((word)s & -8);
-  {
-    size_t inc = n + ((word)s & 7);
-    n = inc | -(inc < n);
-  }
-
-  /* Deal with misalignment in the first word for the comparison.  */
-  mask = (1ul << ((word)s & 7)) - 1;
-
-  /* If the entire string fits within one word, we may need masking
-     at both the front and the back of the string.  */
-  if (unlikely (n <= 8))
-    {
-      mask |= -1ul << n;
-      goto last_quad;
-    }
-
-  found = find (current, c) & ~mask;
-  if (unlikely (found))
-    goto found_it;
-
-  s_align++;
-  n -= 8;
-
-  /* If the block is sufficiently large, align to cacheline and prefetch.  */
-  if (unlikely (n >= 256))
-    {
-      /* Prefetch 3 cache lines beyond the one we're working on.  */
-      prefetch (s_align + 8);
-      prefetch (s_align + 16);
-      prefetch (s_align + 24);
-
-      while ((word)s_align & 63)
-	{
-	  current = *s_align;
-	  found = find (current, c);
-	  if (found)
-	    goto found_it;
-	  s_align++;
-	  n -= 8;
-	}
-
-	/* Within each cacheline, advance the load for the next word
-	   before the test for the previous word is complete.  This
-	   allows us to hide the 3 cycle L1 cache load latency.  We
-	   only perform this advance load within a cacheline to prevent
-	   reading across page boundary.  */
-#define CACHELINE_LOOP				\
-	do {					\
-	  word i, next = s_align[0];		\
-	  for (i = 0; i < 7; ++i)		\
-	    {					\
-	      current = next;			\
-	      next = s_align[1];		\
-	      found = find (current, c);	\
-	      if (unlikely (found))		\
-		goto found_it;			\
-	      s_align++;			\
-	    }					\
-	  current = next;			\
-	  found = find (current, c);		\
-	  if (unlikely (found))			\
-	    goto found_it;			\
-	  s_align++;				\
-	  n -= 64;				\
-	} while (0)
-
-      /* While there's still lots more data to potentially be read,
-	 continue issuing prefetches for the 4th cacheline out.  */
-      while (n >= 256)
-	{
-	  prefetch (s_align + 24);
-	  CACHELINE_LOOP;
-	}
-
-      /* Up to 3 cache lines remaining.  Continue issuing advanced
-	 loads, but stop prefetching.  */
-      while (n >= 64)
-	CACHELINE_LOOP;
-
-      /* We may have exhausted the buffer.  */
-      if (n == 0)
-	return NULL;
-    }
-
-  /* Quadword aligned loop.  */
-  current = *s_align;
-  while (n > 8)
-    {
-      found = find (current, c);
-      if (unlikely (found))
-	goto found_it;
-      current = *++s_align;
-      n -= 8;
-    }
-
-  /* The last word may need masking at the tail of the compare.  */
-  mask = -1ul << n;
- last_quad:
-  found = find (current, c) & ~mask;
-  if (found == 0)
-    return NULL;
-
- found_it:
-#ifdef __alpha_cix__
-  offset = __builtin_alpha_cttz (found);
-#else
-  /* Extract LSB.  */
-  found &= -found;
-
-  /* Binary search for the LSB.  */
-  offset  = (found & 0x0f ? 0 : 4);
-  offset += (found & 0x33 ? 0 : 2);
-  offset += (found & 0x55 ? 0 : 1);
-#endif
-
-  return (void *)((word)s_align + offset);
-}
-
-#ifdef weak_alias
-weak_alias (__memchr, memchr)
-#endif
-libc_hidden_builtin_def (memchr)

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=c1ed1c8d1b25ec9cc0d788070682e5db2827c147

commit c1ed1c8d1b25ec9cc0d788070682e5db2827c147
Author: Richard Henderson <rth@twiddle.net>
Date:   Thu Feb 16 16:18:48 2017 -0200

    Improve generic strchr
    
    New algorithm have the following key differences:
    
      - Reads first word unaligned and use string-maskoff function to
        remove unwanted data.  This strategy follow assemble optimized
        ones for aarch64 and powerpc.
    
      - Use string-fz{b,i} and string-extbyte function.
    
    	[BZ #5806]
    	* string/strchr.c: Use string-fzb.h, string-fzi.h, string-extbyte.h.

diff --git a/string/strchr.c b/string/strchr.c
index a63fdfc..ee8ed5c 100644
--- a/string/strchr.c
+++ b/string/strchr.c
@@ -22,8 +22,15 @@
 
 #include <string.h>
 #include <stdlib.h>
+#include <stdint.h>
+#include <string-fza.h>
+#include <string-fzb.h>
+#include <string-fzi.h>
+#include <string-extbyte.h>
+#include <string-maskoff.h>
 
 #undef strchr
+#undef index
 
 #ifndef STRCHR
 # define STRCHR strchr
@@ -33,153 +40,36 @@
 char *
 STRCHR (const char *s, int c_in)
 {
-  const unsigned char *char_ptr;
-  const unsigned long int *longword_ptr;
-  unsigned long int longword, magic_bits, charmask;
-  unsigned char c;
+  const op_t *word_ptr;
+  op_t found, word;
 
-  c = (unsigned char) c_in;
+  /* Set up a word, each of whose bytes is C.  */
+  unsigned char c = (unsigned char) c_in;
+  op_t repeated_c = repeat_bytes (c_in);
 
-  /* Handle the first few characters by reading one character at a time.
-     Do this until CHAR_PTR is aligned on a longword boundary.  */
-  for (char_ptr = (const unsigned char *) s;
-       ((unsigned long int) char_ptr & (sizeof (longword) - 1)) != 0;
-       ++char_ptr)
-    if (*char_ptr == c)
-      return (void *) char_ptr;
-    else if (*char_ptr == '\0')
-      return NULL;
+  /* Align the input address to op_t.  */
+  uintptr_t s_int = (uintptr_t) s;
+  word_ptr = (op_t*) (s_int & -sizeof (op_t));
 
-  /* All these elucidatory comments refer to 4-byte longwords,
-     but the theory applies equally well to 8-byte longwords.  */
+  /* Read the first aligned word, but force bytes before the string to
+     match neither zero nor goal (we make sure the high bit of each byte
+     is 1, and the low 7 bits are all the opposite of the goal byte).  */
+  op_t bmask = create_mask (s_int);
+  word = (*word_ptr | bmask) ^ (repeated_c & highbit_mask (bmask));
 
-  longword_ptr = (unsigned long int *) char_ptr;
-
-  /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
-     the "holes."  Note that there is a hole just to the left of
-     each byte, with an extra at the end:
-
-     bits:  01111110 11111110 11111110 11111111
-     bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
-
-     The 1-bits make sure that carries propagate to the next 0-bit.
-     The 0-bits provide holes for carries to fall into.  */
-  magic_bits = -1;
-  magic_bits = magic_bits / 0xff * 0xfe << 1 >> 1 | 1;
-
-  /* Set up a longword, each of whose bytes is C.  */
-  charmask = c | (c << 8);
-  charmask |= charmask << 16;
-  if (sizeof (longword) > 4)
-    /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
-    charmask |= (charmask << 16) << 16;
-  if (sizeof (longword) > 8)
-    abort ();
-
-  /* Instead of the traditional loop which tests each character,
-     we will test a longword at a time.  The tricky part is testing
-     if *any of the four* bytes in the longword in question are zero.  */
-  for (;;)
+  while (1)
     {
-      /* We tentatively exit the loop if adding MAGIC_BITS to
-	 LONGWORD fails to change any of the hole bits of LONGWORD.
-
-	 1) Is this safe?  Will it catch all the zero bytes?
-	 Suppose there is a byte with all zeros.  Any carry bits
-	 propagating from its left will fall into the hole at its
-	 least significant bit and stop.  Since there will be no
-	 carry from its most significant bit, the LSB of the
-	 byte to the left will be unchanged, and the zero will be
-	 detected.
-
-	 2) Is this worthwhile?  Will it ignore everything except
-	 zero bytes?  Suppose every byte of LONGWORD has a bit set
-	 somewhere.  There will be a carry into bit 8.  If bit 8
-	 is set, this will carry into bit 16.  If bit 8 is clear,
-	 one of bits 9-15 must be set, so there will be a carry
-	 into bit 16.  Similarly, there will be a carry into bit
-	 24.  If one of bits 24-30 is set, there will be a carry
-	 into bit 31, so all of the hole bits will be changed.
-
-	 The one misfire occurs when bits 24-30 are clear and bit
-	 31 is set; in this case, the hole at bit 31 is not
-	 changed.  If we had access to the processor carry flag,
-	 we could close this loophole by putting the fourth hole
-	 at bit 32!
-
-	 So it ignores everything except 128's, when they're aligned
-	 properly.
-
-	 3) But wait!  Aren't we looking for C as well as zero?
-	 Good point.  So what we do is XOR LONGWORD with a longword,
-	 each of whose bytes is C.  This turns each byte that is C
-	 into a zero.  */
-
-      longword = *longword_ptr++;
-
-      /* Add MAGIC_BITS to LONGWORD.  */
-      if ((((longword + magic_bits)
-
-	    /* Set those bits that were unchanged by the addition.  */
-	    ^ ~longword)
-
-	   /* Look at only the hole bits.  If any of the hole bits
-	      are unchanged, most likely one of the bytes was a
-	      zero.  */
-	   & ~magic_bits) != 0 ||
-
-	  /* That caught zeroes.  Now test for C.  */
-	  ((((longword ^ charmask) + magic_bits) ^ ~(longword ^ charmask))
-	   & ~magic_bits) != 0)
-	{
-	  /* Which of the bytes was C or zero?
-	     If none of them were, it was a misfire; continue the search.  */
-
-	  const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
-
-	  if (*cp == c)
-	    return (char *) cp;
-	  else if (*cp == '\0')
-	    return NULL;
-	  if (*++cp == c)
-	    return (char *) cp;
-	  else if (*cp == '\0')
-	    return NULL;
-	  if (*++cp == c)
-	    return (char *) cp;
-	  else if (*cp == '\0')
-	    return NULL;
-	  if (*++cp == c)
-	    return (char *) cp;
-	  else if (*cp == '\0')
-	    return NULL;
-	  if (sizeof (longword) > 4)
-	    {
-	      if (*++cp == c)
-		return (char *) cp;
-	      else if (*cp == '\0')
-		return NULL;
-	      if (*++cp == c)
-		return (char *) cp;
-	      else if (*cp == '\0')
-		return NULL;
-	      if (*++cp == c)
-		return (char *) cp;
-	      else if (*cp == '\0')
-		return NULL;
-	      if (*++cp == c)
-		return (char *) cp;
-	      else if (*cp == '\0')
-		return NULL;
-	    }
-	}
+      if (has_zero_eq (word, repeated_c))
+        break;
+      word = *++word_ptr;
     }
 
+  found = index_first_zero_eq (word, repeated_c);
+
+  if (extractbyte (word, found) == c)
+    return (char *) (word_ptr) + found;
   return NULL;
 }
 
-#ifdef weak_alias
-# undef index
 weak_alias (strchr, index)
-#endif
 libc_hidden_builtin_def (strchr)
diff --git a/sysdeps/s390/multiarch/strchr-c.c b/sysdeps/s390/multiarch/strchr-c.c
index 606cb56..e91ef94 100644
--- a/sysdeps/s390/multiarch/strchr-c.c
+++ b/sysdeps/s390/multiarch/strchr-c.c
@@ -19,6 +19,7 @@
 #if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
 # define STRCHR  __strchr_c
 # undef weak_alias
+# define weak_alias(a, b)
 # ifdef SHARED
 #  undef libc_hidden_builtin_def
 #  define libc_hidden_builtin_def(name)				\

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=7f989a408cd9197d2b69fbf81a1408200d7efc40

commit 7f989a408cd9197d2b69fbf81a1408200d7efc40
Author: Richard Henderson <rth@twiddle.net>
Date:   Thu Feb 16 16:18:24 2017 -0200

    Improve generic strlen
    
    New algorithm have the following key differences:
    
      - Reads first word unaligned and use string-maskoff function to
        remove unwanted data.  This strategy follow assemble optimized
        ones for powerpc, sparc, and SH.
    
      - Extract has_zero and index_first_zero tests into headers that
        can be tailored for the architecture.
    
    	[BZ #5806]
        	* sysdeps/generic/string-fza.h: New file.
        	* sysdeps/generic/string-fzb.h: New file.
        	* sysdeps/generic/string-fzi.h: New file.
        	* sysdeps/generic/string-extbyte.h: New file.
        	* string/strlen.c: Use them.

diff --git a/string/strlen.c b/string/strlen.c
index 8ce1318..6bd0ed9 100644
--- a/string/strlen.c
+++ b/string/strlen.c
@@ -20,6 +20,11 @@
 
 #include <string.h>
 #include <stdlib.h>
+#include <stdint.h>
+#include <string-fza.h>
+#include <string-fzb.h>
+#include <string-fzi.h>
+#include <string-maskoff.h>
 
 #undef strlen
 
@@ -32,78 +37,20 @@
 size_t
 STRLEN (const char *str)
 {
-  const char *char_ptr;
-  const unsigned long int *longword_ptr;
-  unsigned long int longword, himagic, lomagic;
+  /* Align pointer to sizeof op_t.  */
+  const uintptr_t s_int = (uintptr_t) str;
+  const op_t *word_ptr = (const op_t*) (s_int & -sizeof (op_t));
 
-  /* Handle the first few characters by reading one character at a time.
-     Do this until CHAR_PTR is aligned on a longword boundary.  */
-  for (char_ptr = str; ((unsigned long int) char_ptr
-			& (sizeof (longword) - 1)) != 0;
-       ++char_ptr)
-    if (*char_ptr == '\0')
-      return char_ptr - str;
+  /* Read and MASK the first word. */
+  op_t word = *word_ptr | create_mask (s_int);
 
-  /* All these elucidatory comments refer to 4-byte longwords,
-     but the theory applies equally well to 8-byte longwords.  */
-
-  longword_ptr = (unsigned long int *) char_ptr;
-
-  /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
-     the "holes."  Note that there is a hole just to the left of
-     each byte, with an extra at the end:
-
-     bits:  01111110 11111110 11111110 11111111
-     bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
-
-     The 1-bits make sure that carries propagate to the next 0-bit.
-     The 0-bits provide holes for carries to fall into.  */
-  himagic = 0x80808080L;
-  lomagic = 0x01010101L;
-  if (sizeof (longword) > 4)
+  while (1)
     {
-      /* 64-bit version of the magic.  */
-      /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
-      himagic = ((himagic << 16) << 16) | himagic;
-      lomagic = ((lomagic << 16) << 16) | lomagic;
+      if (has_zero (word))
+	break;
+      word = *++word_ptr;
     }
-  if (sizeof (longword) > 8)
-    abort ();
 
-  /* Instead of the traditional loop which tests each character,
-     we will test a longword at a time.  The tricky part is testing
-     if *any of the four* bytes in the longword in question are zero.  */
-  for (;;)
-    {
-      longword = *longword_ptr++;
-
-      if (((longword - lomagic) & ~longword & himagic) != 0)
-	{
-	  /* Which of the bytes was the zero?  If none of them were, it was
-	     a misfire; continue the search.  */
-
-	  const char *cp = (const char *) (longword_ptr - 1);
-
-	  if (cp[0] == 0)
-	    return cp - str;
-	  if (cp[1] == 0)
-	    return cp - str + 1;
-	  if (cp[2] == 0)
-	    return cp - str + 2;
-	  if (cp[3] == 0)
-	    return cp - str + 3;
-	  if (sizeof (longword) > 4)
-	    {
-	      if (cp[4] == 0)
-		return cp - str + 4;
-	      if (cp[5] == 0)
-		return cp - str + 5;
-	      if (cp[6] == 0)
-		return cp - str + 6;
-	      if (cp[7] == 0)
-		return cp - str + 7;
-	    }
-	}
-    }
+  return ((const char *) word_ptr) + index_first_zero (word) - str;
 }
 libc_hidden_builtin_def (strlen)
diff --git a/sysdeps/generic/string-extbyte.h b/sysdeps/generic/string-extbyte.h
new file mode 100644
index 0000000..1ccd5b3
--- /dev/null
+++ b/sysdeps/generic/string-extbyte.h
@@ -0,0 +1,35 @@
+/* string-extbyte.h -- function memory order byte extract.  Generic C version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_EXTBYTE_H
+#define STRING_EXTBYTE_H 1
+
+#include <limits.h>
+#include <endian.h>
+#include <string-optype.h>
+
+static inline unsigned char
+extractbyte (op_t x, unsigned idx)
+{
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    return x >> (idx * CHAR_BIT);
+  else
+    return x >> (sizeof (x) - 1 - idx) * CHAR_BIT;
+}
+
+#endif /* STRING_EXTBYTE_H */
diff --git a/sysdeps/generic/string-fza.h b/sysdeps/generic/string-fza.h
new file mode 100644
index 0000000..638df2e
--- /dev/null
+++ b/sysdeps/generic/string-fza.h
@@ -0,0 +1,117 @@
+/* string-fza.h -- zero byte detection; basics.  Generic C version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_FZA_H
+#define STRING_FZA_H 1
+
+#include <limits.h>
+#include <string-optype.h>
+
+/* This function returns non-zero if any byte in X is zero.
+   More specifically, at least one bit set within the least significant
+   byte that was zero; other bytes within the word are indeterminate.  */
+
+static inline op_t
+find_zero_low (op_t x)
+{
+  /* This expression comes from
+       https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+     Subtracting 1 sets 0x80 in a byte that was 0; anding ~x clears
+     0x80 in a byte that was >= 128; anding 0x80 isolates that test bit.  */
+  op_t lsb = (op_t)-1 / 0xff;
+  op_t msb = lsb << (CHAR_BIT - 1);
+  return (x - lsb) & ~x & msb;
+}
+
+/* This function returns at least one bit set within every byte of X that
+   is zero.  The result is exact in that, unlike find_zero_low, all bytes
+   are determinate.  This is usually used for finding the index of the
+   most significant byte that was zero.  */
+
+static inline op_t
+find_zero_all (op_t x)
+{
+  /* For each byte, find not-zero by
+     (0) And 0x7f so that we cannot carry between bytes,
+     (1) Add 0x7f so that non-zero carries into 0x80,
+     (2) Or in the original byte (which might have had 0x80 set).
+     Then invert and mask such that 0x80 is set iff that byte was zero.  */
+  op_t m = ((op_t)-1 / 0xff) * 0x7f;
+  return ~(((x & m) + m) | x | m);
+}
+
+/* With similar caveats, identify bytes that are equal between X1 and X2.  */
+
+static inline op_t
+find_eq_low (op_t x1, op_t x2)
+{
+  return find_zero_low (x1 ^ x2);
+}
+
+static inline op_t
+find_eq_all (op_t x1, op_t x2)
+{
+  return find_zero_all (x1 ^ x2);
+}
+
+/* With similar caveats, identify zero bytes in X1 and bytes that are
+   equal between in X1 and X2.  */
+
+static inline op_t
+find_zero_eq_low (op_t x1, op_t x2)
+{
+  op_t lsb = (op_t)-1 / 0xff;
+  op_t msb = lsb << (CHAR_BIT - 1);
+  op_t eq = x1 ^ x2;
+  return (((x1 - lsb) & ~x1) | ((eq - lsb) & ~eq)) & msb;
+}
+
+static inline op_t
+find_zero_eq_all (op_t x1, op_t x2)
+{
+  op_t m = ((op_t)-1 / 0xff) * 0x7f;
+  op_t eq = x1 ^ x2;
+  op_t c1 = ((x1 & m) + m) | x1;
+  op_t c2 = ((eq & m) + m) | eq;
+  return ~((c1 & c2) | m);
+}
+
+/* With similar caveats, identify zero bytes in X1 and bytes that are
+   not equal between in X1 and X2.  */
+
+static inline op_t
+find_zero_ne_low (op_t x1, op_t x2)
+{
+  op_t m = ((op_t)-1 / 0xff) * 0x7f;
+  op_t eq = x1 ^ x2;
+  op_t nz1 = (x1 + m) | x1;	/* msb set if byte not zero */
+  op_t ne2 = (eq + m) | eq;	/* msb set if byte not equal */
+  return (ne2 | ~nz1) & ~m;	/* msb set if x1 zero or x2 not equal */
+}
+
+static inline op_t
+find_zero_ne_all (op_t x1, op_t x2)
+{
+  op_t m = ((op_t)-1 / 0xff) * 0x7f;
+  op_t eq = x1 ^ x2;
+  op_t nz1 = ((x1 & m) + m) | x1;
+  op_t ne2 = ((eq & m) + m) | eq;
+  return (ne2 | ~nz1) & ~m;
+}
+
+#endif /* STRING_FZA_H */
diff --git a/sysdeps/generic/string-fzb.h b/sysdeps/generic/string-fzb.h
new file mode 100644
index 0000000..e0fc26f
--- /dev/null
+++ b/sysdeps/generic/string-fzb.h
@@ -0,0 +1,49 @@
+/* string-fzb.h -- zero byte detection, boolean.  Generic C version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_FZB_H
+#define STRING_FZB_H 1
+
+#include <endian.h>
+#include <string-fza.h>
+
+/* Determine if any byte within X is zero.  This is a pure boolean test.  */
+
+static inline _Bool
+has_zero (op_t x)
+{
+  return find_zero_low (x) != 0;
+}
+
+/* Likewise, but for byte equality between X1 and X2.  */
+
+static inline _Bool
+has_eq (op_t x1, op_t x2)
+{
+  return find_eq_low (x1, x2) != 0;
+}
+
+/* Likewise, but for zeros in X1 and equal bytes between X1 and X2.  */
+
+static inline _Bool
+has_zero_eq (op_t x1, op_t x2)
+{
+  return find_zero_eq_low (x1, x2);
+}
+
+#endif /* STRING_FZB_H */
diff --git a/sysdeps/generic/string-fzi.h b/sysdeps/generic/string-fzi.h
new file mode 100644
index 0000000..da24b8e
--- /dev/null
+++ b/sysdeps/generic/string-fzi.h
@@ -0,0 +1,152 @@
+/* string-fzi.h -- zero byte detection; indexes.  Generic C version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_FZI_H
+#define STRING_FZI_H 1
+
+#include <limits.h>
+#include <endian.h>
+#include <string-fza.h>
+
+/* A subroutine for the index_zero functions.  Given a test word C, return
+   the (memory order) index of the first byte (in memory order) that is
+   non-zero.  */
+
+static inline unsigned int
+index_first_ (op_t c)
+{
+  _Static_assert (sizeof (op_t) == sizeof (long)
+		  || sizeof (op_t) == sizeof (long long),
+		  "Unhandled word size");
+
+  unsigned r;
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    {
+      if (sizeof (op_t) == sizeof (long))
+	r = __builtin_ctzl (c);
+      else
+	r = __builtin_ctzll (c);
+    }
+  else
+    {
+      if (sizeof (op_t) == sizeof (long))
+	r = __builtin_clzl (c);
+      else
+	r = __builtin_clzll (c);
+    }
+  return r / CHAR_BIT;
+}
+
+/* Similarly, but return the (memory order) index of the last byte
+   that is non-zero.  */
+
+static inline unsigned int
+index_last_ (op_t c)
+{
+  _Static_assert (sizeof (op_t) == sizeof (long)
+		  || sizeof (op_t) == sizeof (long long),
+		  "Unhandled word size");
+
+  unsigned r;
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    {
+      if (sizeof (op_t) == sizeof (long))
+	r = __builtin_clzl (c);
+      else
+	r = __builtin_clzll (c);
+    }
+  else
+    {
+      if (sizeof (op_t) == sizeof (long))
+	r = __builtin_ctzl (c);
+      else
+	r = __builtin_ctzll (c);
+    }
+  return sizeof (op_t) - 1 - (r / CHAR_BIT);
+}
+
+/* Given a word X that is known to contain a zero byte, return the
+   index of the first such within the word in memory order.  */
+
+static inline unsigned int
+index_first_zero (op_t x)
+{
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    x = find_zero_low (x);
+  else
+    x = find_zero_all (x);
+  return index_first_ (x);
+}
+
+/* Similarly, but perform the search for byte equality between X1 and X2.  */
+
+static inline unsigned int
+index_first_eq (op_t x1, op_t x2)
+{
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    x1 = find_eq_low (x1, x2);
+  else
+    x1 = find_eq_all (x1, x2);
+  return index_first_ (x1);
+}
+
+/* Similarly, but perform the search for zero within X1 or
+   equality between X1 and X2.  */
+
+static inline unsigned int
+index_first_zero_eq (op_t x1, op_t x2)
+{
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    x1 = find_zero_eq_low (x1, x2);
+  else
+    x1 = find_zero_eq_all (x1, x2);
+  return index_first_ (x1);
+}
+
+/* Similarly, but perform the search for zero within X1 or
+   inequality between X1 and X2.  */
+
+static inline unsigned int
+index_first_zero_ne (op_t x1, op_t x2)
+{
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    x1 = find_zero_ne_low (x1, x2);
+  else
+    x1 = find_zero_ne_all (x1, x2);
+  return index_first_ (x1);
+}
+
+/* Similarly, but search for the last zero within X.  */
+
+static inline unsigned int
+index_last_zero (op_t x)
+{
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    x = find_zero_all (x);
+  else
+    x = find_zero_low (x);
+  return index_last_ (x);
+}
+
+static inline unsigned int
+index_last_eq (op_t x1, op_t x2)
+{
+  return index_last_zero (x1 ^ x2);
+}
+
+#endif /* STRING_FZI_H */

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=d16d6a7e1fe19a5f252f721a3229c0daf8979a31

commit d16d6a7e1fe19a5f252f721a3229c0daf8979a31
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date:   Thu Feb 23 18:45:54 2017 -0300

    Add string-maskoff.h generic header
    
    Macros to operate on unaligned access for string operations, such as
    to create a bit mask to remove non wanted bytes from an unaligned
    read, and to repeat byte within a word.
    
    	* sysdeps/generic/string-maskoff.h: New file.

diff --git a/sysdeps/generic/string-maskoff.h b/sysdeps/generic/string-maskoff.h
new file mode 100644
index 0000000..3038d3e
--- /dev/null
+++ b/sysdeps/generic/string-maskoff.h
@@ -0,0 +1,65 @@
+/* string-maskoff.h -- mask off bits.  Generic C version.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_MASKOFF_H
+#define STRING_MASKOFF_H 1
+
+#include <endian.h>
+#include <stdint.h>
+#include <string-optype.h>
+
+/* Provide a mask based on the pointer alignment that sets up non-zero
+   bytes before the beginning of the word.  It is used to mask off
+   undesirable bits from an aligned read from an unaligned pointer.
+   For instance, on a 64 bits machine with a pointer alignment of
+   3 the function returns 0x0000000000ffffff for LE and 0xffffff0000000000
+   (meaning to mask off the initial 3 bytes).  */
+static inline op_t
+create_mask (uintptr_t i)
+{
+  i = i % sizeof (op_t);
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    return ~(((op_t)-1) << (i * CHAR_BIT));
+  else
+    return ~(((op_t)-1) >> (i * CHAR_BIT));
+}
+
+/* Setup an word with each byte being c_in.  For instance, on
+   a 64 bits machine with input as 0xce the functions returns
+   0xcececececececece.  */
+static inline op_t
+repeat_bytes (unsigned char c_in)
+{
+  return ((op_t)-1 / 0xff) * c_in;
+}
+
+/* Create a mask with high bit of each byte being 1, and the low 7 bits
+   being all the opposite of the input mask.  It is used to mask off
+   undesirable bits from an aligned read from an unaligned pointer,
+   and also taking care to avoid match possible bytes meant to be
+   matched.  For instance, on a 64 bits machine with a pointer alignment
+   of 3 the function returns 0x7f7f7f0000000000 (input meant to
+   be 0xffffff0000000000) for BE and 0x00000000007f7f7f for LE (input
+   meant to be 0x0000000000ffffff).  */
+static inline op_t
+highbit_mask (op_t m)
+{
+  return m & ~repeat_bytes (0x80);
+}
+
+#endif /* STRING_MASKOFF_H  */

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=ad1d81b8ba31514579912a7ff52c5405c21b9726

commit ad1d81b8ba31514579912a7ff52c5405c21b9726
Author: Richard Henderson <rth@twiddle.net>
Date:   Thu Feb 16 16:15:27 2017 -0200

    Parameterize OP_T_THRES from memcopy.h
    
    Basically it moves OP_T_THRES out of memcopy.h to its own header
    and adjust each architecture that redefines it.
    
    	* sysdeps/generic/memcopy.h (OP_T_THRES): Move...
    	* sysdeps/generic/string-opthr.h: ... here; new file.
    	* sysdeps/i386/memcopy.h (OP_T_THRES): Move...
    	* sysdeps/i386/string-opthr.h: ... here; new file.
    	* sysdeps/m68k/memcopy.h (OP_T_THRES): Remove.
    	* sysdeps/powerpc/powerpc32/power4/memcopy.h (OP_T_THRES): Remove.

diff --git a/sysdeps/generic/memcopy.h b/sysdeps/generic/memcopy.h
index c7e9cc9..1698379 100644
--- a/sysdeps/generic/memcopy.h
+++ b/sysdeps/generic/memcopy.h
@@ -58,6 +58,7 @@
 
 /* Type to use for aligned memory operations.  */
 #include <string-optype.h>
+#include <string-opthr.h>
 
 #define OPSIZ	(sizeof(op_t))
 
@@ -190,9 +191,6 @@ extern void _wordcopy_bwd_dest_aligned (long int, long int, size_t)
 
 #endif
 
-/* Threshold value for when to enter the unrolled loops.  */
-#define	OP_T_THRES	16
-
 /* Set to 1 if memcpy is safe to use for forward-copying memmove with
    overlapping addresses.  This is 0 by default because memcpy implementations
    are generally not safe for overlapping addresses.  */
diff --git a/sysdeps/generic/string-opthr.h b/sysdeps/generic/string-opthr.h
new file mode 100644
index 0000000..bf2f407
--- /dev/null
+++ b/sysdeps/generic/string-opthr.h
@@ -0,0 +1,25 @@
+/* string-opthr.h -- Define a threshold for word access.  Generic version.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_OPTHR_H
+#define STRING_OPTHR_H 1
+
+/* Threshold value for when to enter the unrolled loops.  */
+#define OP_T_THRES	16
+
+#endif /* string-opthr.h */
diff --git a/sysdeps/generic/string-optype.h b/sysdeps/generic/string-optype.h
new file mode 100644
index 0000000..63e5b09
--- /dev/null
+++ b/sysdeps/generic/string-optype.h
@@ -0,0 +1,31 @@
+/* string-optype.h -- Define a type to use for word access.  Generic version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_OPTYPE_H
+#define STRING_OPTYPE_H 1
+
+/* Use the existing parameterization from gmp as a default.  */
+#include <gmp-mparam.h>
+
+#ifdef _LONG_LONG_LIMB
+typedef unsigned long long int  op_t;
+#else
+typedef unsigned long int       op_t;
+#endif
+
+#endif /* string-optype.h */
diff --git a/sysdeps/i386/memcopy.h b/sysdeps/i386/memcopy.h
index 12bb39f..28cee47 100644
--- a/sysdeps/i386/memcopy.h
+++ b/sysdeps/i386/memcopy.h
@@ -19,9 +19,6 @@
 
 #include <sysdeps/generic/memcopy.h>
 
-#undef	OP_T_THRES
-#define	OP_T_THRES	8
-
 #undef	BYTE_COPY_FWD
 #define BYTE_COPY_FWD(dst_bp, src_bp, nbytes)				      \
   do {									      \
diff --git a/sysdeps/i386/string-opthr.h b/sysdeps/i386/string-opthr.h
new file mode 100644
index 0000000..c1be3d2
--- /dev/null
+++ b/sysdeps/i386/string-opthr.h
@@ -0,0 +1,25 @@
+/* string-opthr.h -- Define a threshold for word access.  i386 version.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef I386_STRING_OPTHR_H
+#define I386_STRING_OPTHR_H 1
+
+/* Threshold value for when to enter the unrolled loops.  */
+#define OP_T_THRES	8
+
+#endif /* I386_STRING_OPTHR_H */
diff --git a/sysdeps/m68k/memcopy.h b/sysdeps/m68k/memcopy.h
index 58569c6..ee0c5fc 100644
--- a/sysdeps/m68k/memcopy.h
+++ b/sysdeps/m68k/memcopy.h
@@ -21,9 +21,6 @@
 
 #if	defined(__mc68020__) || defined(mc68020)
 
-#undef	OP_T_THRES
-#define	OP_T_THRES	16
-
 /* WORD_COPY_FWD and WORD_COPY_BWD are not symmetric on the 68020,
    because of its weird instruction overlap characteristics.  */
 
diff --git a/sysdeps/powerpc/powerpc32/power4/memcopy.h b/sysdeps/powerpc/powerpc32/power4/memcopy.h
index 8050abc..37ed40b 100644
--- a/sysdeps/powerpc/powerpc32/power4/memcopy.h
+++ b/sysdeps/powerpc/powerpc32/power4/memcopy.h
@@ -51,11 +51,6 @@
      [I fail to understand.  I feel stupid.  --roland]
 */
 
-
-/* Threshold value for when to enter the unrolled loops.  */
-#undef	OP_T_THRES
-#define OP_T_THRES 16
-
 /* Copy exactly NBYTES bytes from SRC_BP to DST_BP,
    without any assumptions about alignment of the pointers.  */
 #undef BYTE_COPY_FWD

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=6b9ecce2f78a2bebc2c1c21c0b21e32ccdad8862

commit 6b9ecce2f78a2bebc2c1c21c0b21e32ccdad8862
Author: Richard Henderson <rth@twiddle.net>
Date:   Thu Feb 16 16:14:09 2017 -0200

    Parameterize op_t from memcopy.h
    
    Basically moves op_t definition out to an specific header.
    
    	* sysdeps/generic/string-optype.h: New file.
    	* sysdeps/generic/memcopy.h: Include it.

diff --git a/sysdeps/generic/memcopy.h b/sysdeps/generic/memcopy.h
index c0d8da3..c7e9cc9 100644
--- a/sysdeps/generic/memcopy.h
+++ b/sysdeps/generic/memcopy.h
@@ -56,10 +56,9 @@
      [I fail to understand.  I feel stupid.  --roland]
 */
 
-/* Type to use for aligned memory operations.
-   This should normally be the biggest type supported by a single load
-   and store.  */
-#define	op_t	unsigned long int
+/* Type to use for aligned memory operations.  */
+#include <string-optype.h>
+
 #define OPSIZ	(sizeof(op_t))
 
 /* Type to use for unaligned operations.  */

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=85893852d17ce5742f128308e55ead1f10e2beb0

commit 85893852d17ce5742f128308e55ead1f10e2beb0
Author: Adhemerval Zanella <adhemerval.zanella@linaro.com>
Date:   Thu Mar 9 15:00:27 2017 +0100

    string: Remove __memchr definition
    
    Since memchr is a C90 function (so there are no external-linkage
    namespace issues), and not used in any macros defined in installed headers
    (so no block-scope namespace issues) it is safe to just remove its
    internal definition and just set all the arch specific implementation
    to just define memchr instead.
    
    Checked on x86_64-linux-gnu and with build-many-glibc.py.
    
    	* string/memchr.c (__memchr): Redefine to memchr.
    	* sysdeps/aarch64/memchr.S (__memchr): Likewise.
    	* sysdeps/aarch64/rawmemchr.S (__memchr): Likewise.
    	* sysdeps/i386/i686/multiarch/memchr.S (__memchr): Likewise.
    	* sysdeps/i386/memchr.S (__memchr): Likewise.
    	* sysdeps/ia64/memchr.S (__memchr): Likewise.
    	* sysdeps/m68k/memchr.S (__memchr): Likewise.
    	* sysdeps/powerpc/powerpc32/power4/multiarch/memchr-ppc32.c
    	 (__memchr): Likewise.
    	* sysdeps/powerpc/powerpc32/power7/memchr.S (__memchr): Likewise.
    	* sysdeps/powerpc/powerpc64/power7/memchr.S (__memchr): Likewise.
    	* sysdeps/sparc/sparc32/memchr.S (__memchr): Likewise.
    	* sysdeps/sparc/sparc64/memchr.S (__memchr): Likewise.
    	* sysdeps/tile/tilegx/memchr.c (__memchr): Likewise.
    	* sysdeps/tile/tilepro/memchr.c (__memchr): Likewise.
    	* sysdeps/x86_64/memchr.S (__memchr): Likewise.

diff --git a/string/memchr.c b/string/memchr.c
index c4e21b8..523f9fe 100644
--- a/string/memchr.c
+++ b/string/memchr.c
@@ -35,12 +35,8 @@
 # undef memchr
 #endif
 
-#ifndef weak_alias
-# define __memchr memchr
-#endif
-
 #ifndef MEMCHR
-# define MEMCHR __memchr
+# define MEMCHR memchr
 #endif
 
 /* Search no more than N bytes of S for C.  */
@@ -156,7 +152,4 @@ MEMCHR (void const *s, int c_in, size_t n)
 
   return NULL;
 }
-#ifdef weak_alias
-weak_alias (__memchr, memchr)
-#endif
 libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/aarch64/memchr.S b/sysdeps/aarch64/memchr.S
index e422aef..08af9fc 100644
--- a/sysdeps/aarch64/memchr.S
+++ b/sysdeps/aarch64/memchr.S
@@ -59,7 +59,7 @@
  * identify exactly which byte has matched.
  */
 
-ENTRY (__memchr)
+ENTRY (memchr)
 	/* Do not dereference srcin if no bytes to compare.  */
 	cbz	cntin, L(zero_length)
 	/*
@@ -152,6 +152,5 @@ L(tail):
 L(zero_length):
 	mov	result, #0
 	ret
-END (__memchr)
-weak_alias (__memchr, memchr)
+END (memchr)
 libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/aarch64/rawmemchr.S b/sysdeps/aarch64/rawmemchr.S
index de3c05f..42f2ec8 100644
--- a/sysdeps/aarch64/rawmemchr.S
+++ b/sysdeps/aarch64/rawmemchr.S
@@ -27,7 +27,7 @@
 ENTRY (__rawmemchr)
 	cbz	w1, L(do_strlen)
 	mov	x2, -1
-	b	__memchr
+	b	memchr
 
 L(do_strlen):
 	mov	x15, x30
diff --git a/sysdeps/i386/i686/multiarch/memchr-ia32.S b/sysdeps/i386/i686/multiarch/memchr-ia32.S
index 9c870e5..ba9dc55 100644
--- a/sysdeps/i386/i686/multiarch/memchr-ia32.S
+++ b/sysdeps/i386/i686/multiarch/memchr-ia32.S
@@ -17,7 +17,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #if IS_IN (libc)
-# define __memchr __memchr_ia32
+# define memchr __memchr_ia32
 
 # ifdef SHARED
 #  undef libc_hidden_builtin_def
@@ -25,7 +25,7 @@
    they will be called without setting up EBX needed for PLT which is
    used by IFUNC.  */
 #  define libc_hidden_builtin_def(name) \
-	.globl __GI_memchr; __GI_memchr = __memchr
+	.globl __GI_memchr; __GI_memchr = memchr
 # endif
 
 # undef weak_alias
diff --git a/sysdeps/i386/memchr.S b/sysdeps/i386/memchr.S
index ee5960a..953345c 100644
--- a/sysdeps/i386/memchr.S
+++ b/sysdeps/i386/memchr.S
@@ -36,7 +36,7 @@
 #define LEN	CHR+4
 
 	.text
-ENTRY (__memchr)
+ENTRY (memchr)
 
 	/* Save callee-safe registers used in this function.  */
 	pushl %esi
@@ -316,7 +316,6 @@ L(9):	popl %edi		/* pop saved registers */
 	cfi_restore (esi)
 
 	ret
-END (__memchr)
+END (memchr)
 
-weak_alias (__memchr, memchr)
 libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/ia64/memchr.S b/sysdeps/ia64/memchr.S
index c52fca6..445cee1 100644
--- a/sysdeps/ia64/memchr.S
+++ b/sysdeps/ia64/memchr.S
@@ -54,7 +54,7 @@
 
 #define str		in0
 
-ENTRY(__memchr)
+ENTRY(memchr)
 	.prologue
 	alloc r2 = ar.pfs, 3, 0, 29, 32
 #include "softpipe.h"
@@ -155,7 +155,6 @@ ENTRY(__memchr)
 	adds	ret0 = 8, ret0		// load the next unchecked 8byte
 	br.sptk	.l4;;
 
-END(__memchr)
+END(memchr)
 
-weak_alias (__memchr, memchr)
 libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/m68k/memchr.S b/sysdeps/m68k/memchr.S
index 89cd8f6..bf9f73a 100644
--- a/sysdeps/m68k/memchr.S
+++ b/sysdeps/m68k/memchr.S
@@ -23,7 +23,7 @@
 #include "asm-syntax.h"
 
 	TEXT
-ENTRY(__memchr)
+ENTRY(memchr)
 	/* Save the callee-saved registers we use.  */
 #ifdef __mcoldfire__
 	movel	R(d2),MEM_PREDEC(sp)
@@ -301,7 +301,6 @@ L(L9:)
 	cfi_restore (R(d4))
 #endif
 	rts
-END(__memchr)
+END(memchr)
 
-weak_alias (__memchr, memchr)
 libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-ppc32.c b/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-ppc32.c
index 9333b54..2227f0c 100644
--- a/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-ppc32.c
+++ b/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-ppc32.c
@@ -23,12 +23,10 @@
 #undef weak_alias
 #define weak_alias(a, b)
 
-#ifdef SHARED
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name) \
-  __hidden_ver1(__memchr_ppc, __GI_memchr, __memchr_ppc);
-#endif
-
 extern __typeof (memchr) __memchr_ppc attribute_hidden;
 
 #include <string/memchr.c>
+
+#ifdef SHARED
+__hidden_ver1(__memchr_ppc, __GI_memchr, __memchr_ppc);
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power7/memchr.S b/sysdeps/powerpc/powerpc32/power7/memchr.S
index ec78709..78aae96 100644
--- a/sysdeps/powerpc/powerpc32/power7/memchr.S
+++ b/sysdeps/powerpc/powerpc32/power7/memchr.S
@@ -21,7 +21,7 @@
 
 /* int [r3] memchr (char *s [r3], int byte [r4], int size [r5])  */
 	.machine  power7
-ENTRY (__memchr)
+ENTRY (memchr)
 	CALL_MCOUNT
 	dcbt	0,r3
 	clrrwi  r8,r3,2
@@ -188,6 +188,5 @@ L(small_range):
 	bne	cr6,L(done)
 	blr
 
-END (__memchr)
-weak_alias (__memchr, memchr)
+END (memchr)
 libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/sparc/sparc32/memchr.S b/sysdeps/sparc/sparc32/memchr.S
index fc8fa3e..340cdef 100644
--- a/sysdeps/sparc/sparc32/memchr.S
+++ b/sysdeps/sparc/sparc32/memchr.S
@@ -31,7 +31,7 @@
 
 	.text
 	.align		4
-ENTRY(__memchr)
+ENTRY(memchr)
 	andcc		%o1, 0xff, %o1
 	sll		%o1, 8, %g6
 	andcc		%o0, 3, %g0
@@ -136,7 +136,6 @@ ENTRY(__memchr)
 	 sub		%o0, 3, %o0
 4:	retl
 	 sub		%o0, 4, %o0
-END(__memchr)
+END(memchr)
 
-weak_alias (__memchr, memchr)
 libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/sparc/sparc64/memchr.S b/sysdeps/sparc/sparc64/memchr.S
index 1024cb0..ba983ee 100644
--- a/sysdeps/sparc/sparc64/memchr.S
+++ b/sysdeps/sparc/sparc64/memchr.S
@@ -54,7 +54,7 @@
 
 	.text
 	.align		32
-ENTRY(__memchr)
+ENTRY(memchr)
 	and		%o1, 0xff, %o1			/* IEU0		Group		*/
 #ifdef USE_BPR
 	brz,pn		%o2, 12f			/* CTI+IEU1			*/
@@ -254,7 +254,6 @@ ENTRY(__memchr)
 
 23:	retl						/* CTI+IEU1	Group		*/
 	 add		%o0, -1, %o0			/* IEU0				*/
-END(__memchr)
+END(memchr)
 
-weak_alias (__memchr, memchr)
 libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/tile/memchr.c b/sysdeps/tile/memchr.c
index 96a9f6e..7efd35f 100644
--- a/sysdeps/tile/memchr.c
+++ b/sysdeps/tile/memchr.c
@@ -21,7 +21,7 @@
 #include "string-endian.h"
 
 void *
-__memchr (const void *s, int c, size_t n)
+memchr (const void *s, int c, size_t n)
 {
   const uint64_t *last_word_ptr;
   const uint64_t *p;
@@ -73,5 +73,4 @@ __memchr (const void *s, int c, size_t n)
   ret = ((char *) p) + (CFZ (bits) >> 3);
   return (ret <= last_byte_ptr) ? ret : NULL;
 }
-weak_alias (__memchr, memchr)
 libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
index feef5d4..317dc2d 100644
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -324,7 +324,4 @@ L(return_null):
 	ret
 END(MEMCHR)
 
-#ifndef USE_AS_WMEMCHR
-strong_alias (memchr, __memchr)
 libc_hidden_builtin_def(memchr)
-#endif

-----------------------------------------------------------------------


hooks/post-receive
-- 
GNU C Library master sources
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]