From cae28869c106eb342dd5a1c8242f933efab6f772 Mon Sep 17 00:00:00 2001 From: Jeff Johnston Date: Mon, 26 May 2008 22:56:14 +0000 Subject: [PATCH] 2008-05-26 Eric Blake Optimize the generic and x86 strlen. * libc/string/strlen.c (strlen) [!__OPTIMIZE_SIZE__]: Pre-align data so unaligned searches aren't penalized. * libc/machine/i386/strlen.S (strlen) [!__OPTIMIZE_SIZE__]: Word operations are faster than repnz byte searches. --- newlib/ChangeLog | 8 ++++ newlib/libc/machine/i386/strlen.S | 65 ++++++++++++++++++++++++++++++- newlib/libc/string/strlen.c | 42 ++++++++++---------- 3 files changed, 93 insertions(+), 22 deletions(-) diff --git a/newlib/ChangeLog b/newlib/ChangeLog index 652be51c7..74fe2fd4d 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,3 +1,11 @@ +2008-05-26 Eric Blake + + Optimize the generic and x86 strlen. + * libc/string/strlen.c (strlen) [!__OPTIMIZE_SIZE__]: Pre-align + data so unaligned searches aren't penalized. + * libc/machine/i386/strlen.S (strlen) [!__OPTIMIZE_SIZE__]: + Word operations are faster than repnz byte searches. + 2008-05-23 Corinna Vinschen * libc/include/sys/_default_fcntl.h: Include on Cygwin. diff --git a/newlib/libc/machine/i386/strlen.S b/newlib/libc/machine/i386/strlen.S index 459b3a959..0e3cb640c 100644 --- a/newlib/libc/machine/i386/strlen.S +++ b/newlib/libc/machine/i386/strlen.S @@ -1,6 +1,6 @@ /* * ==================================================== - * Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved. + * Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved. * * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice @@ -20,12 +20,75 @@ SYM (strlen): pushl edi movl 8(ebp),edx +#ifdef __OPTIMIZE_SIZE__ cld movl edx,edi movl $4294967295,ecx xor eax,eax repnz scasb +#else +/* Modern x86 hardware is much faster at double-word + manipulation than with bytewise repnz scasb. */ + +/* Do byte-wise checks until string is aligned. */ + movl edx,edi + test $3,edi + je L5 + movb (edi),cl + incl edi + testb cl,cl + je L15 + + test $3,edi + je L5 + movb (edi),cl + incl edi + testb cl,cl + je L15 + + test $3,edi + je L5 + movb (edi),cl + incl edi + testb cl,cl + je L15 + +L5: + subl $4,edi + +/* loop performing 4 byte mask checking for desired 0 byte */ + .p2align 4,,7 +L10: + addl $4,edi + movl (edi),ecx + leal -16843009(ecx),eax + notl ecx + andl ecx,eax + testl $-2139062144,eax + je L10 + +/* Find which of four bytes is 0. */ + notl ecx + incl edi + + testb cl,cl + je L15 + incl edi + shrl $8,ecx + + testb cl,cl + je L15 + incl edi + shrl $8,ecx + + testb cl,cl + je L15 + incl edi + +#endif + +L15: subl edx,edi leal -1(edi),eax diff --git a/newlib/libc/string/strlen.c b/newlib/libc/string/strlen.c index 4249e14c7..a796d2738 100644 --- a/newlib/libc/string/strlen.c +++ b/newlib/libc/string/strlen.c @@ -1,7 +1,7 @@ -/* +/* FUNCTION <>---character string length - + INDEX strlen @@ -57,32 +57,32 @@ size_t _DEFUN (strlen, (str), _CONST char *str) { -#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) _CONST char *start = str; - while (*str) - str++; - - return str - start; -#else - _CONST char *start = str; +#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__) unsigned long *aligned_addr; - if (!UNALIGNED (str)) + /* Align the pointer, so we can search a word at a time. */ + while (UNALIGNED (str)) { - /* If the string is word-aligned, we can check for the presence of - a null in each word-sized block. */ - aligned_addr = (unsigned long*)str; - while (!DETECTNULL (*aligned_addr)) - aligned_addr++; - - /* Once a null is detected, we check each byte in that block for a - precise position of the null. */ - str = (char*)aligned_addr; + if (!*str) + return str - start; + str++; } - + + /* If the string is word-aligned, we can check for the presence of + a null in each word-sized block. */ + aligned_addr = (unsigned long *)str; + while (!DETECTNULL (*aligned_addr)) + aligned_addr++; + + /* Once a null is detected, we check each byte in that block for a + precise position of the null. */ + str = (char *) aligned_addr; + +#endif /* not PREFER_SIZE_OVER_SPEED */ + while (*str) str++; return str - start; -#endif /* not PREFER_SIZE_OVER_SPEED */ } -- 2.43.5