while working on an optimized version of strnlen, some string tests fail
Nam-goo Lee
devnglee@gmail.com
Wed Dec 14 05:54:00 GMT 2016
> I don't know what "this" is.
I'm sorry that I didn't attached the code. I've attached it this time.
> Benchmark results would be desirable on a range of modern ARM processors
Tested for Cortex-A7 and Cortex-A15.
> testing should be done for both endiannesses
Done for both endianness using QEMU.
> So it's best to get the assignment in place and confirmed accepted
> by the FSF before posting a patch to libc-alpha.
I'm not getting any replies from assign@gnu.org.
I'm planning to send this patch to libc-alpha, is it okay?
-------------- next part --------------
#include <sysdep.h>
#ifdef NO_THUMB
/* This clobbers the condition codes, which the real Thumb cbz instruction
does not do. But it doesn't matter for any of the uses here. */
.macro cbz reg, label
cmp \reg, #0
beq \label
.endm
#endif
ENTRY (__strnlen)
@ r0 : STR
@ r1 : MAXLEN
.balign 64
cbz r1, 99f @ Check whether MAXLEN is 0
sfi_breg r0, \
ldrb r2, [\B] @ Reads a cache line
cbz r2, 98f
@ No double-word operations if MAXLEN < 8
cmp r1, #8
bcs .Ldword
mov r3, r0 @ Save STR
1: subs r1, r1, #1 @ Look for more characters?
beq 2f
sfi_breg r0, \
ldrb r2, [\B, #1]!
@ r0 : Addr of r2
@ r1 : Remaining number of bytes to search
@ r2 : Data
@ r3 : Saved STR
cmp r2, #0 @ Is it '\0'?
bne 1b
sub r0, r0, r3
DO_RET(lr)
2: add r0, r0, #1
sub r0, r0, r3
DO_RET(lr)
98: mov r1, r2
99: mov r0, r1
DO_RET(lr)
.Ldword:
strd r4, r5, [sp, #-8]!
@ Check whether STR + MAXLEN overflows to pass strnlen (s, -1) test.
adds r1, r0, r1
it cs
mvncs r1, #0
ands r4, r0, #7 @ r4: offset
bne .Lnot_aligned
mov r4, r0 @ Save STR
sfi_breg r0, \
ldrd r2, r3, [\B], #8
.Lmain_restart:
#ifdef ARCH_HAS_T2
movw r5, #0x0101
movt r5, #0x0101
#else
ldr r5, =0x01010101
#endif
.Lmain_loop:
@ r0 : (addr of r2) + 8
@ r1 : END_PTR (string/strnlen.c)
@ r2 : lower word
@ r3 : higher word
@ r4 : saved STR
@ r5 : 0x01010101
uqsub8 r2, r5, r2
uqsub8 r3, r5, r3
orrs r3, r3, r2
bne .Lfound
cmp r0, r1
bcs .Lmaxlen
sfi_breg r0, \
ldrd r2, r3, [\B], #8
uqsub8 r2, r5, r2
uqsub8 r3, r5, r3
orrs r3, r3, r2
bne .Lfound
cmp r0, r1
bcs .Lmaxlen
sfi_breg r0, \
ldrd r2, r3, [\B], #8
uqsub8 r2, r5, r2
uqsub8 r3, r5, r3
orrs r3, r3, r2
bne .Lfound
cmp r0, r1
bcs .Lmaxlen
sfi_breg r0, \
ldrd r2, r3, [\B], #8
uqsub8 r2, r5, r2
uqsub8 r3, r5, r3
orrs r3, r3, r2
bne .Lfound
cmp r0, r1
it cc
sfi_breg r0, \
ldrdcc r2, r3, [\B], #8
bcc .Lmain_loop
@ END_PTR reached
.Lmaxlen:
sub r0, r1, r4
sfi_breg sp, \
ldrd r4, r5, [\B], #8
DO_RET(lr)
.Lfound:
@ Found something. Disambiguate between first and second words.
@ Adjust r0 to point to the word containing the match.
@ Adjust r2 to the found bits for the word containing the match.
sub r0, r0, #4
cmp r2, #0
ite eq
moveq r2, r3
subne r0, r0, #4
#ifdef __ARMEL__
rev r2, r2
#endif
clz r2, r2
add r0, r0, r2, lsr #3
cmp r0, r1
it hi
movhi r0, r1
sub r0, r0, r4
sfi_breg sp, \
ldrd r4, r5, [\B], #8
DO_RET(lr)
.Lnot_aligned:
@ r0 : STR
@ r1 : END_PTR (check string/strnlen.c)
@ r4 : offset
cmp r4, #4
it cs @ Starting byte is at r3
subcs r4, r4, #4
rsb r4, r4, #4
mov r5, r4, lsl #3 @ Byte to bit conversion
mvn r4, #0
#ifdef __ARMEB__
mov r5, r4, lsl r5
#else
mov r5, r4, lsr r5
#endif
mov r4, r0 @ Save STR
bic r0, r0, #7
sfi_breg r0, \
ldrd r2, r3, [\B], #8
it cs
movcs r2, r3
orr r2, r2, r5 @ Fill in bytes before STR with 0xff
itt cs
movcs r3, r2
mvncs r2, #0
b .Lmain_restart
END (__strnlen)
libc_hidden_def (__strnlen)
weak_alias (__strnlen, strnlen)
libc_hidden_def (strnlen)
More information about the Libc-help
mailing list