This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch, master, updated. glibc-2.11-255-g462a522
- From: davem at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 9 Mar 2010 14:42:30 -0000
- Subject: GNU C Library master sources branch, master, updated. glibc-2.11-255-g462a522
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via 462a5227b0d3220ab68f65272bd5b9d6d4f49b1f (commit)
from 2fe000dfd673859eb3b1e1c9739de66445d9fe08 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=462a5227b0d3220ab68f65272bd5b9d6d4f49b1f
commit 462a5227b0d3220ab68f65272bd5b9d6d4f49b1f
Author: David S. Miller <davem@davemloft.net>
Date: Tue Mar 9 06:42:53 2010 -0800
sparc: Optimize strlen using techniques from powerpc implementation.
diff --git a/ChangeLog b/ChangeLog
index afb549b..7e5e7bb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -6,6 +6,11 @@
* sysdeps/sparc/sparc32/memcpy.S: Fix build.
+ * sysdeps/sparc/sparc32/strlen.S: Optimize.
+ * sysdeps/sparc/sparc64/strlen.S: Likewise.
+ * sysdeps/sparc/sparc32/sparcv9/strlen.S (ASI_PNF, ASI_BLK_P,
+ XCC): Delete definitions, not needed.
+
2010-03-07 Ulrich Drepper <drepper@redhat.com>
* sysdeps/unix/sysv/linux/internal_statvfs.c (__statvfs_getflags):
diff --git a/sysdeps/sparc/sparc32/sparcv9/strlen.S b/sysdeps/sparc/sparc32/sparcv9/strlen.S
index b8f4dba..28a216c 100644
--- a/sysdeps/sparc/sparc32/sparcv9/strlen.S
+++ b/sysdeps/sparc/sparc32/sparcv9/strlen.S
@@ -1,4 +1 @@
-#define ASI_PNF 0x82
-#define ASI_BLK_P 0xf0
-#define XCC icc
#include <sparc64/strlen.S>
diff --git a/sysdeps/sparc/sparc32/strlen.S b/sysdeps/sparc/sparc32/strlen.S
index ed92f20..2945bb5 100644
--- a/sysdeps/sparc/sparc32/strlen.S
+++ b/sysdeps/sparc/sparc32/strlen.S
@@ -1,8 +1,9 @@
/* Determine the length of a string.
For SPARC v7.
- Copyright (C) 1996, 1999, 2003 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1999, 2003, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Jakub Jelinek <jj@ultra.linux.cz>.
+ Contributed by Jakub Jelinek <jj@ultra.linux.cz> and
+ David S. Miller <davem@davemloft.net>.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -21,86 +22,55 @@
#include <sysdep.h>
- /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x01010101) & (~xword) & 0x80808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
-
.text
.align 4
ENTRY(strlen)
- mov %o0, %o1
- andcc %o0, 3, %g0
- be 20f
- sethi %hi(0x80808080), %o4
-
- ldub [%o0], %o5
- cmp %o5, 0
- be 21f
- add %o0, 1, %o0
- andcc %o0, 3, %g0
- be 4f
- or %o4, %lo(0x80808080), %o3
- ldub [%o0], %o5
- cmp %o5, 0
- be 22f
- add %o0, 1, %o0
- andcc %o0, 3, %g0
- be 5f
- sethi %hi(0x01010101), %o4
- ldub [%o0], %o5
- cmp %o5, 0
- be 23f
- add %o0, 1, %o0
- b 11f
- or %o4, %lo(0x01010101), %o2
-21: retl
- mov 0, %o0
-22: retl
- mov 1, %o0
-23: retl
- mov 2, %o0
-
-20: or %o4, %lo(0x80808080), %o3
-4: sethi %hi(0x01010101), %o4
-5: or %o4, %lo(0x01010101), %o2
-11: ld [%o0], %o5
-12: sub %o5, %o2, %o4
-#ifdef EIGHTBIT_NOT_RARE
- andn %o4, %o5, %o4
-#endif
- andcc %o4, %o3, %g0
- be 11b
- add %o0, 4, %o0
-
- srl %o5, 24, %g5
- andcc %g5, 0xff, %g0
- be 13f
- add %o0, -4, %o4
- srl %o5, 16, %g5
- andcc %g5, 0xff, %g0
- be 13f
- add %o4, 1, %o4
- srl %o5, 8, %g5
- andcc %g5, 0xff, %g0
- be 13f
- add %o4, 1, %o4
- andcc %o5, 0xff, %g0
- bne,a 12b
- ld [%o0], %o5
- add %o4, 1, %o4
-13: retl
- sub %o4, %o1, %o0
+ mov %o0, %o1
+ andn %o0, 0x3, %o0
+
+ ld [%o0], %o5
+ and %o1, 0x3, %g1
+ mov -1, %g5
+
+ sethi %hi(0x01010101), %o2
+ sll %g1, 3, %g1
+
+ or %o2, %lo(0x01010101), %o2
+ srl %g5, %g1, %g2
+
+ orn %o5, %g2, %o5
+ sll %o2, 7, %o3
+10: add %o0, 4, %o0
+
+ andn %o3, %o5, %g1
+ sub %o5, %o2, %g2
+
+ andcc %g1, %g2, %g0
+ be,a 10b
+ ld [%o0], %o5
+
+ srl %o5, 24, %g1
+
+ andcc %g1, 0xff, %g0
+ be 90f
+ sub %o0, 4, %o0
+
+ srl %o5, 16, %g2
+
+ andcc %g2, 0xff, %g0
+ be 90f
+ add %o0, 1, %o0
+
+ srl %o5, 8, %g1
+
+ andcc %g1, 0xff, %g0
+ be 90f
+ add %o0, 1, %o0
+
+ add %o0, 1, %o0
+
+90: retl
+ sub %o0, %o1, %o0
END(strlen)
libc_hidden_builtin_def (strlen)
diff --git a/sysdeps/sparc/sparc64/strlen.S b/sysdeps/sparc/sparc64/strlen.S
index cc15e4e..64350fb 100644
--- a/sysdeps/sparc/sparc64/strlen.S
+++ b/sysdeps/sparc/sparc64/strlen.S
@@ -1,8 +1,9 @@
/* Determine the length of a string. For SPARC v9.
- Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc.
+ Copyright (C) 1998, 1999, 2003, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
- Jakub Jelinek <jj@ultra.linux.cz>.
+ Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>,
+ Jakub Jelinek <jj@ultra.linux.cz>, and
+ David S. Miller <davem@davemloft.net>.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -20,155 +21,66 @@
02111-1307 USA. */
#include <sysdep.h>
-#include <asm/asi.h>
-
- /* Normally, this uses
- ((xword - 0x0101010101010101) & 0x8080808080808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
+
+ .register %g2, #scratch
+ .register %g3, #scratch
.text
.align 32
ENTRY(strlen)
- sethi %hi(0x01010101), %g1 /* IEU0 Group */
- ldub [%o0], %o3 /* Load */
- or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
- mov %o0, %o1 /* IEU1 */
-
- sllx %g1, 32, %g4 /* IEU0 Group */
- andcc %o0, 7, %g0 /* IEU1 */
- or %g1, %g4, %g1 /* IEU0 Group */
- brz,pn %o3, 13f /* CTI+IEU1 */
-
- sllx %g1, 7, %g4 /* IEU0 Group */
- bne,a,pn %icc, 15f /* CTI */
- add %o0, 1, %o0 /* IEU1 */
- /* %g1 = 0x0101010101010101 *
- * %g4 = 0x8080808080808080 *
- * %o0 = string pointer *
- * %o1 = start of string */
-1: ldx [%o0], %o3 /* Load Group */
-
- add %o0, 8, %o0 /* IEU1 */
-2: sub %o3, %g1, %o2 /* IEU0 Group */
-#ifdef EIGHTBIT_NOT_RARE
- andn %o2, %o3, %o5 /* IEU0 Group */
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- andcc %o5, %g4, %g0 /* IEU1 Group */
-#else
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- andcc %o2, %g4, %g0 /* IEU1 Group */
-#endif
-
- be,pt %xcc, 2b /* CTI */
- add %o0, 8, %o0 /* IEU0 */
- addcc %o2, %g1, %g5 /* IEU1 Group */
-#ifdef EIGHTBIT_NOT_RARE
- srlx %o5, 32, %o5 /* IEU0 */
-
-3: andcc %o5, %g4, %g0 /* IEU1 Group */
-#else
- srlx %o2, 32, %o2 /* IEU0 */
-
-3: andcc %o2, %g4, %g0 /* IEU1 Group */
-#endif
- be,pn %xcc, 4f /* CTI */
- srlx %g5, 56, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 12f /* CTI */
- srlx %g5, 48, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 11f /* CTI */
-
- srlx %g5, 40, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 10f /* CTI */
- srlx %g5, 32, %o2 /* IEU0 */
-
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 9f /* CTI */
-4: srlx %g5, 24, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 8f /* CTI */
- srlx %g5, 16, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 7f /* CTI */
-
- srlx %g5, 8, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 6f /* CTI */
- sub %o3, %g1, %o2 /* IEU0 */
-
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 5f /* CTI */
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- andcc %o2, %g4, %g0 /* IEU1 Group */
-
- be,pt %xcc, 2b /* CTI */
- add %o0, 8, %o0 /* IEU0 */
- addcc %o2, %g1, %g5 /* IEU1 Group */
- ba,pt %xcc, 3b /* CTI */
-
- srlx %o2, 32, %o2 /* IEU0 */
-5: add %o0, -9, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-
-6: add %o0, -10, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-7: add %o0, -11, %o0 /* IEU0 Group */
-
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-8: add %o0, -12, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
-
- sub %o0, %o1, %o0 /* IEU0 */
-9: add %o0, -13, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-
-10: add %o0, -14, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-11: add %o0, -15, %o0 /* IEU0 Group */
-
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-12: add %o0, -16, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
-
- sub %o0, %o1, %o0 /* IEU0 */
-13: retl /* CTI+IEU1 Group */
- mov 0, %o0 /* IEU0 */
- nop
-
-15: ldub [%o0], %o3 /* Load Group */
-16: andcc %o0, 7, %g0 /* IEU1 */
- be,pn %icc, 1b /* CTI */
- nop /* IEU0 Group */
-
- add %o0, 1, %o0 /* IEU1 */
- andcc %o3, 0xff, %g0 /* IEU1 Group */
- bne,a,pt %icc, 16b /* CTI */
- lduba [%o0] ASI_PNF, %o3 /* Load */
-
- add %o0, -1, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
+ mov %o0, %o1
+ andn %o0, 0x7, %o0
+
+ ldx [%o0], %o5
+ and %o1, 0x7, %g1
+ mov -1, %g5
+
+ sethi %hi(0x01010101), %o2
+ sll %g1, 3, %g1
+
+ or %o2, %lo(0x01010101), %o2
+ srlx %g5, %g1, %o3
+
+ sllx %o2, 32, %g1
+ sethi %hi(0x0000ff00), %g5
+
+ orn %o5, %o3, %o5
+ or %o2, %g1, %o2
+
+ sllx %o2, 7, %o3
+10: add %o0, 8, %o0
+
+ andn %o3, %o5, %g1
+ sub %o5, %o2, %g2
+
+ andcc %g1, %g2, %g0
+ be,a,pt %xcc, 10b
+ ldx [%o0], %o5
+ srlx %o5, 32, %g1
+
+ andn %o3, %g1, %o4
+ sub %g1, %o2, %g2
+
+ add %o0, 4, %g3
+ andcc %o4, %g2, %g0
+ movne %icc, %g1, %o5
+
+ move %icc, %g3, %o0
+ or %g5, %lo(0x0000ff00), %g5
+ mov 3 - 8, %g2
+
+ andcc %o5, %g5, %g0
+ srlx %o5, 16, %g1
+ move %icc, 2 - 8, %g2
+
+ andcc %g1, 0xff, %g0
+ srl %o5, 24, %o5
+ move %icc, 1 - 8, %g2
+
+ movrz %o5, 0 - 8, %g2
+ sub %o0, %o1, %o0
+
+ retl
+ add %o0, %g2, %o0
END(strlen)
libc_hidden_builtin_def (strlen)
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 5 +
sysdeps/sparc/sparc32/sparcv9/strlen.S | 3 -
sysdeps/sparc/sparc32/strlen.S | 128 ++++++++------------
sysdeps/sparc/sparc64/strlen.S | 210 +++++++++----------------------
4 files changed, 115 insertions(+), 231 deletions(-)
hooks/post-receive
--
GNU C Library master sources