while working on an optimized version of strnlen, some string tests fail

Nam-goo Lee devnglee@gmail.com
Wed Dec 14 05:54:00 GMT 2016


> I don't know what "this" is.

I'm sorry that I didn't attached the code. I've attached it this time.

> Benchmark results would be desirable on a range of modern ARM processors

Tested for Cortex-A7 and Cortex-A15.

>  testing should be done for both endiannesses

Done for both endianness using QEMU.

> So it's best to get the assignment in place and confirmed accepted
> by the FSF before posting a patch to libc-alpha.

I'm not getting any replies from assign@gnu.org.
I'm planning to send this patch to libc-alpha, is it okay?
-------------- next part --------------
#include <sysdep.h>

#ifdef NO_THUMB

/* This clobbers the condition codes, which the real Thumb cbz instruction
   does not do.  But it doesn't matter for any of the uses here.  */
.macro cbz reg, label
	cmp \reg, #0
	beq \label
.endm

#endif

ENTRY (__strnlen)
	@ r0 : STR
	@ r1 : MAXLEN

	.balign 64
	cbz	r1, 99f			@ Check whether MAXLEN is 0
	sfi_breg r0, \
	ldrb	r2, [\B]		@ Reads a cache line
	cbz	r2, 98f

	@ No double-word operations if MAXLEN < 8
	cmp	r1, #8
	bcs	.Ldword

	mov	r3, r0			@ Save STR

1:	subs	r1, r1, #1		@ Look for more characters?
	beq	2f
	sfi_breg r0, \
	ldrb	r2, [\B, #1]!

	@ r0 : Addr of r2
	@ r1 : Remaining number of bytes to search
	@ r2 : Data
	@ r3 : Saved STR

	cmp	r2, #0			@ Is it '\0'?
	bne	1b
	sub	r0, r0, r3
	DO_RET(lr)

2:	add	r0, r0, #1
	sub	r0, r0, r3
	DO_RET(lr)

98:	mov	r1, r2
99:	mov	r0, r1
	DO_RET(lr)

.Ldword:
	strd	r4, r5, [sp, #-8]!

	@ Check whether STR + MAXLEN overflows to pass strnlen (s, -1) test.
	adds	r1, r0, r1
	it	cs
	mvncs	r1, #0

	ands	r4, r0, #7		@ r4: offset
	bne	.Lnot_aligned

	mov	r4, r0			@ Save STR
	sfi_breg r0, \
	ldrd	r2, r3, [\B], #8

.Lmain_restart:
#ifdef ARCH_HAS_T2
	movw	r5, #0x0101
	movt	r5, #0x0101
#else
	ldr	r5, =0x01010101
#endif

.Lmain_loop:

	@ r0 : (addr of r2) + 8
	@ r1 : END_PTR (string/strnlen.c)
	@ r2 : lower word
	@ r3 : higher word
	@ r4 : saved STR
	@ r5 : 0x01010101

	uqsub8	r2, r5, r2
	uqsub8	r3, r5, r3
	orrs	r3, r3, r2
	bne	.Lfound
	cmp	r0, r1
	bcs	.Lmaxlen
	sfi_breg r0, \
	ldrd	r2, r3, [\B], #8

	uqsub8	r2, r5, r2
	uqsub8	r3, r5, r3
	orrs	r3, r3, r2
	bne	.Lfound
	cmp	r0, r1
	bcs	.Lmaxlen
	sfi_breg r0, \
	ldrd	r2, r3, [\B], #8

	uqsub8	r2, r5, r2
	uqsub8	r3, r5, r3
	orrs	r3, r3, r2
	bne	.Lfound
	cmp	r0, r1
	bcs	.Lmaxlen
	sfi_breg r0, \
	ldrd	r2, r3, [\B], #8

	uqsub8	r2, r5, r2
	uqsub8	r3, r5, r3
	orrs	r3, r3, r2
	bne	.Lfound
	cmp	r0, r1
	it	cc
	sfi_breg r0, \
	ldrdcc	r2, r3, [\B], #8
	bcc	.Lmain_loop

	@ END_PTR reached
.Lmaxlen:
	sub	r0, r1, r4

	sfi_breg sp, \
	ldrd	r4, r5, [\B], #8
	DO_RET(lr)

.Lfound:
	@ Found something.  Disambiguate between first and second words.
	@ Adjust r0 to point to the word containing the match.
	@ Adjust r2 to the found bits for the word containing the match.
	sub	r0, r0, #4
	cmp	r2, #0
	ite	eq
	moveq	r2, r3
	subne	r0, r0, #4

#ifdef __ARMEL__
	rev	r2, r2
#endif

	clz	r2, r2
	add	r0, r0, r2, lsr #3

	cmp	r0, r1
	it	hi
	movhi	r0, r1
	sub	r0, r0, r4

	sfi_breg sp, \
	ldrd	r4, r5, [\B], #8
	DO_RET(lr)

.Lnot_aligned:

	@ r0 : STR
	@ r1 : END_PTR (check string/strnlen.c)
	@ r4 : offset

	cmp	r4, #4
	it	cs			@ Starting byte is at r3
	subcs	r4, r4, #4
	rsb	r4, r4, #4
	mov	r5, r4, lsl #3		@ Byte to bit conversion
	mvn	r4, #0
#ifdef	__ARMEB__
	mov	r5, r4, lsl r5
#else
	mov	r5, r4, lsr r5
#endif
	mov	r4, r0			@ Save STR

	bic	r0, r0, #7
	sfi_breg r0, \
	ldrd	r2, r3, [\B], #8

	it	cs
	movcs	r2, r3
	orr	r2, r2, r5		@ Fill in bytes before STR with 0xff
	itt	cs
	movcs	r3, r2
	mvncs	r2, #0

	b	.Lmain_restart

END (__strnlen)

libc_hidden_def (__strnlen)
weak_alias (__strnlen, strnlen)
libc_hidden_def (strnlen)


More information about the Libc-help mailing list