[AArch64] add optimized strchrnul

Richard Earnshaw rearnsha@arm.com
Wed Jun 11 10:45:00 GMT 2014


Hot on the heels of yesterday's strchr patch, here's the implementation
of strchrnul.  This is, in fact, easier to compute than strchr, since we
always return a non-null result, so we don't have to track why we've
finished scanning.

2014-06-11  Richard Earnshaw  <rearnsha@arm.com>

	* libc/machine/aarch64/strchrnul.S: New file.
	* libc/machine/aarch64/strchrnul-stub.c: New file.
	* libc/machine/aarch64/Makefile.am: Add them to build list.
	* libc/machine/aarch64/Makefile.in: Regenerated.

Committed.
-------------- next part --------------
Index: Makefile.am
===================================================================
RCS file: /cvs/src/src/newlib/libc/machine/aarch64/Makefile.am,v
retrieving revision 1.8
diff -u -r1.8 Makefile.am
--- Makefile.am	10 Jun 2014 14:04:31 -0000	1.8
+++ Makefile.am	11 Jun 2014 09:39:27 -0000
@@ -20,6 +20,8 @@
 lib_a_SOURCES += setjmp.S
 lib_a_SOURCES += strchr-stub.c
 lib_a_SOURCES += strchr.S
+lib_a_SOURCES += strchrnul-stub.c
+lib_a_SOURCES += strchrnul.S
 lib_a_SOURCES += strcmp-stub.c
 lib_a_SOURCES += strcmp.S
 lib_a_SOURCES += strlen-stub.c
Index: Makefile.in
===================================================================
RCS file: /cvs/src/src/newlib/libc/machine/aarch64/Makefile.in,v
retrieving revision 1.9
diff -u -r1.9 Makefile.in
--- Makefile.in	10 Jun 2014 14:04:31 -0000	1.9
+++ Makefile.in	11 Jun 2014 09:39:27 -0000
@@ -74,7 +74,8 @@
 	lib_a-memmove-stub.$(OBJEXT) lib_a-memmove.$(OBJEXT) \
 	lib_a-memset-stub.$(OBJEXT) lib_a-memset.$(OBJEXT) \
 	lib_a-setjmp.$(OBJEXT) lib_a-strchr-stub.$(OBJEXT) \
-	lib_a-strchr.$(OBJEXT) lib_a-strcmp-stub.$(OBJEXT) \
+	lib_a-strchr.$(OBJEXT) lib_a-strchrnul-stub.$(OBJEXT) \
+	lib_a-strchrnul.$(OBJEXT) lib_a-strcmp-stub.$(OBJEXT) \
 	lib_a-strcmp.$(OBJEXT) lib_a-strlen-stub.$(OBJEXT) \
 	lib_a-strlen.$(OBJEXT) lib_a-strncmp-stub.$(OBJEXT) \
 	lib_a-strncmp.$(OBJEXT) lib_a-strnlen-stub.$(OBJEXT) \
@@ -206,8 +207,9 @@
 noinst_LIBRARIES = lib.a
 lib_a_SOURCES = memcmp-stub.c memcmp.S memcpy-stub.c memcpy.S \
 	memmove-stub.c memmove.S memset-stub.c memset.S setjmp.S \
-	strchr-stub.c strchr.S strcmp-stub.c strcmp.S strlen-stub.c \
-	strlen.S strncmp-stub.c strncmp.S strnlen-stub.c strnlen.S
+	strchr-stub.c strchr.S strchrnul-stub.c strchrnul.S \
+	strcmp-stub.c strcmp.S strlen-stub.c strlen.S strncmp-stub.c \
+	strncmp.S strnlen-stub.c strnlen.S
 lib_a_CCASFLAGS = $(AM_CCASFLAGS)
 lib_a_CFLAGS = $(AM_CFLAGS)
 ACLOCAL_AMFLAGS = -I ../../.. -I ../../../..
@@ -306,6 +308,12 @@
 lib_a-strchr.obj: strchr.S
 	$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-strchr.obj `if test -f 'strchr.S'; then $(CYGPATH_W) 'strchr.S'; else $(CYGPATH_W) '$(srcdir)/strchr.S'; fi`
 
+lib_a-strchrnul.o: strchrnul.S
+	$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-strchrnul.o `test -f 'strchrnul.S' || echo '$(srcdir)/'`strchrnul.S
+
+lib_a-strchrnul.obj: strchrnul.S
+	$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-strchrnul.obj `if test -f 'strchrnul.S'; then $(CYGPATH_W) 'strchrnul.S'; else $(CYGPATH_W) '$(srcdir)/strchrnul.S'; fi`
+
 lib_a-strcmp.o: strcmp.S
 	$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-strcmp.o `test -f 'strcmp.S' || echo '$(srcdir)/'`strcmp.S
 
@@ -366,6 +374,12 @@
 lib_a-strchr-stub.obj: strchr-stub.c
 	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strchr-stub.obj `if test -f 'strchr-stub.c'; then $(CYGPATH_W) 'strchr-stub.c'; else $(CYGPATH_W) '$(srcdir)/strchr-stub.c'; fi`
 
+lib_a-strchrnul-stub.o: strchrnul-stub.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strchrnul-stub.o `test -f 'strchrnul-stub.c' || echo '$(srcdir)/'`strchrnul-stub.c
+
+lib_a-strchrnul-stub.obj: strchrnul-stub.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strchrnul-stub.obj `if test -f 'strchrnul-stub.c'; then $(CYGPATH_W) 'strchrnul-stub.c'; else $(CYGPATH_W) '$(srcdir)/strchrnul-stub.c'; fi`
+
 lib_a-strcmp-stub.o: strcmp-stub.c
 	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strcmp-stub.o `test -f 'strcmp-stub.c' || echo '$(srcdir)/'`strcmp-stub.c
 
Index: strchrnul-stub.c
===================================================================
RCS file: strchrnul-stub.c
diff -N strchrnul-stub.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ strchrnul-stub.c	11 Jun 2014 09:39:27 -0000
@@ -0,0 +1,31 @@
+/* Copyright (c) 2014, ARM Limited
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+       * Redistributions of source code must retain the above copyright
+         notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+         notice, this list of conditions and the following disclaimer in the
+         documentation and/or other materials provided with the distribution.
+       * Neither the name of the company nor the names of its contributors
+         may be used to endorse or promote products derived from this
+         software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  */
+
+#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED))
+# include "../../string/strchrnul.c"
+#else
+/* See strchrnul.S  */
+#endif
Index: strchrnul.S
===================================================================
RCS file: strchrnul.S
diff -N strchrnul.S
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ strchrnul.S	11 Jun 2014 09:39:27 -0000
@@ -0,0 +1,149 @@
+/*
+   strchrnul - find a character or nul in a string
+
+   Copyright (c) 2014, ARM Limited
+   All rights Reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+       * Redistributions of source code must retain the above copyright
+         notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+         notice, this list of conditions and the following disclaimer in the
+         documentation and/or other materials provided with the distribution.
+       * Neither the name of the company nor the names of its contributors
+         may be used to endorse or promote products derived from this
+         software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  */
+
+#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED))
+/* See strchrnul-stub.c  */
+#else
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Neon Available.
+ */
+
+/* Arguments and results.  */
+#define srcin		x0
+#define chrin		w1
+
+#define result		x0
+
+#define src		x2
+#define	tmp1		x3
+#define wtmp2		w4
+#define tmp3		x5
+
+#define vrepchr		v0
+#define vdata1		v1
+#define vdata2		v2
+#define vhas_nul1	v3
+#define vhas_nul2	v4
+#define vhas_chr1	v5
+#define vhas_chr2	v6
+#define vrepmask	v15
+#define vend1		v16
+
+/* Core algorithm.
+
+   For each 32-byte hunk we calculate a 64-bit syndrome value, with
+   two bits per byte (LSB is always in bits 0 and 1, for both big
+   and little-endian systems).  For each tuple, bit 0 is set iff
+   the relevant byte matched the requested character or nul.  Since the
+   bits in the syndrome reflect exactly the order in which things occur
+   in the original string a count_trailing_zeros() operation will
+   identify exactly which byte is causing the termination.  */
+
+/* Locals and temporaries.  */
+
+	.macro def_fn f p2align=0
+	.text
+	.p2align \p2align
+	.global \f
+	.type \f, %function
+\f:
+	.endm
+
+def_fn strchrnul
+	/* Magic constant 0x40100401 to allow us to identify which lane
+	   matches the termination condition.  */
+	mov	wtmp2, #0x0401
+	movk	wtmp2, #0x4010, lsl #16
+	dup	vrepchr.16b, chrin
+	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
+	dup	vrepmask.4s, wtmp2
+	ands	tmp1, srcin, #31
+	b.eq	.Lloop
+
+	/* Input string is not 32-byte aligned.  Rather than forcing
+	   the padding bytes to a safe value, we calculate the syndrome
+	   for all the bytes, but then mask off those bits of the
+	   syndrome that are related to the padding.  */
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	neg	tmp1, tmp1
+	cmeq	vhas_nul1.16b, vdata1.16b, #0
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_nul2.16b, vdata2.16b, #0
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	orr	vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b
+	orr	vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
+	lsl	tmp1, tmp1, #1
+	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
+	mov	tmp3, #~0
+	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64
+	lsr	tmp1, tmp3, tmp1
+
+	mov	tmp3, vend1.2d[0]
+	bic	tmp1, tmp3, tmp1	// Mask padding bits.
+	cbnz	tmp1, .Ltail
+
+.Lloop:
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	cmeq	vhas_nul1.16b, vdata1.16b, #0
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_nul2.16b, vdata2.16b, #0
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	/* Use a fast check for the termination condition.  */
+	orr	vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b
+	orr	vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
+	orr	vend1.16b, vhas_chr1.16b, vhas_chr2.16b
+	addp	vend1.2d, vend1.2d, vend1.2d
+	mov	tmp1, vend1.2d[0]
+	cbz	tmp1, .Lloop
+
+	/* Termination condition found.  Now need to establish exactly why
+	   we terminated.  */
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
+	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b		// 256->128
+	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64
+
+	mov	tmp1, vend1.2d[0]
+.Ltail:
+	/* Count the trailing zeros, by bit reversing...  */
+	rbit	tmp1, tmp1
+	/* Re-bias source.  */
+	sub	src, src, #32
+	clz	tmp1, tmp1	/* ... and counting the leading zeros.  */
+	/* tmp1 is twice the offset into the fragment.  */
+	add	result, src, tmp1, lsr #1
+	ret
+
+	.size	strchrnul, . - strchrnul
+#endif


More information about the Newlib mailing list