From: Ulrich Drepper Date: Mon, 14 Nov 2011 23:24:35 +0000 (-0500) Subject: Add SSE4.2 support for strcasecmp and strncasecmp on x86-32 X-Git-Tag: glibc-2.15~87^2 X-Git-Url: https://sourceware.org/git/?a=commitdiff_plain;h=6abf346582ba678f4850a88b4a5950593841df1d;p=glibc.git Add SSE4.2 support for strcasecmp and strncasecmp on x86-32 --- diff --git a/ChangeLog b/ChangeLog index 739105f553..d5889fc5b1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2011-11-14 Ulrich Drepper + + * sysdeps/i386/i686/multiarch/Makefile [subdir=string] + (sysdep_routines): Add strcasecmp_l-sse4 and strncase_l-sse4. + * sysdeps/i386/i686/multiarch/strcasecmp.S: Re-enable SSE4.2 code. + * sysdeps/i386/i686/multiarch/strcmp.S: Likewise. + * sysdeps/i386/i686/multiarch/strncase.S: Likewise. + * sysdeps/i386/i686/multiarch/strcmp-sse4.S: Change to allow reuse + to compile strcasecmp and strncasecmp. + * sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S: New file. + * sysdeps/i386/i686/multiarch/strncase_l-sse4.S: New file. + + * sysdeps/i386/i686/multiarch/strcmp-ssse3.S: Use L macro consistently. + 2011-11-13 Ulrich Drepper * sysdeps/i386/i686/multiarch/Makefile [subdir=string]: Add diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile index 05bd65f463..426b718e47 100644 --- a/sysdeps/i386/i686/multiarch/Makefile +++ b/sysdeps/i386/i686/multiarch/Makefile @@ -21,7 +21,8 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ rawmemchr-sse2 rawmemchr-sse2-bsf \ strnlen-sse2 strnlen-c \ strcasecmp_l-c strcasecmp-c strcasecmp_l-ssse3 \ - strncase_l-c strncase-c strncase_l-ssse3 + strncase_l-c strncase-c strncase_l-ssse3 \ + strcasecmp_l-sse4 strncase_l-sse4 ifeq (yes,$(config-cflags-sse4)) sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c CFLAGS-varshift.c += -msse4 diff --git a/sysdeps/i386/i686/multiarch/strcasecmp.S b/sysdeps/i386/i686/multiarch/strcasecmp.S index 55f10ba6af..97603d884b 100644 --- a/sysdeps/i386/i686/multiarch/strcasecmp.S +++ b/sysdeps/i386/i686/multiarch/strcasecmp.S @@ -36,12 +36,9 @@ ENTRY(__strcasecmp) testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) jz 2f leal __strcasecmp_ssse3@GOTOFF(%ebx), %eax -#if 0 - // XXX Temporarily testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx) jz 2f leal __strcasecmp_sse4_2@GOTOFF(%ebx), %eax -#endif 2: popl %ebx cfi_adjust_cfa_offset (-4) cfi_restore (ebx) diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S b/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S new file mode 100644 index 0000000000..411d4153f2 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S @@ -0,0 +1,2 @@ +#define USE_AS_STRCASECMP_L 1 +#include "strcmp-sse4.S" diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S index 0de0a113c0..1df63e3156 100644 --- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S +++ b/sysdeps/i386/i686/multiarch/strcmp-sse4.S @@ -1,5 +1,5 @@ /* strcmp with SSE4.2 - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -34,33 +34,156 @@ #define PUSH(REG) pushl REG; CFI_PUSH (REG) #define POP(REG) popl REG; CFI_POP (REG) -#ifndef USE_AS_STRNCMP +#ifdef USE_AS_STRNCMP # ifndef STRCMP -# define STRCMP __strcmp_sse4_2 +# define STRCMP __strncmp_sse4_2 # endif -# define STR1 4 +# define STR1 8 # define STR2 STR1+4 -# define RETURN ret; .p2align 4 -#else +# define CNT STR2+4 +# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) +# define REM %ebp +#elif defined USE_AS_STRCASECMP_L +# include "locale-defines.h" # ifndef STRCMP -# define STRCMP __strncmp_sse4_2 +# define STRCMP __strcasecmp_l_sse4_2 # endif -# define STR1 8 +# define STR1 12 +# define STR2 STR1+4 +# define LOCALE 12 /* Loaded before the adjustement. */ +# ifdef PIC +# define RETURN POP (%edi); POP (%ebx); ret; \ + .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (%edi) +# else +# define RETURN POP (%edi); ret; .p2align 4; CFI_PUSH (%edi) +# endif +# define NONASCII __strcasecmp_nonascii +#elif defined USE_AS_STRNCASECMP_L +# include "locale-defines.h" +# ifndef STRCMP +# define STRCMP __strncasecmp_l_sse4_2 +# endif +# define STR1 16 # define STR2 STR1+4 # define CNT STR2+4 -# define RETURN POP (%ebp); ret; .p2align 4; CFI_PUSH (%ebp) +# define LOCALE 16 /* Loaded before the adjustement. */ +# ifdef PIC +# define RETURN POP (%edi); POP (REM); POP (%ebx); ret; \ + .p2align 4; \ + CFI_PUSH (%ebx); CFI_PUSH (REM); CFI_PUSH (%edi) +# else +# define RETURN POP (%edi); POP (REM); ret; \ + .p2align 4; CFI_PUSH (REM); CFI_PUSH (%edi) +# endif +# define REM %ebp +# define NONASCII __strncasecmp_nonascii +#else +# ifndef STRCMP +# define STRCMP __strcmp_sse4_2 +# endif +# define STR1 4 +# define STR2 STR1+4 +# define RETURN ret; .p2align 4 #endif .section .text.sse4.2,"ax",@progbits -ENTRY (STRCMP) -#ifdef USE_AS_STRNCMP - PUSH (%ebp) + +#ifdef USE_AS_STRCASECMP_L +ENTRY (__strcasecmp_sse4_2) +# ifdef PIC + PUSH (%ebx) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax +# else + movl __libc_tsd_LOCALE@NTPOFF, %eax +# endif + movl %gs:(%eax), %eax +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax +# else + movl (%eax), %eax +# endif + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) + jne __strcasecmp_nonascii + jmp L(ascii) +END (__strcasecmp_sse4_2) +#endif + +#ifdef USE_AS_STRNCASECMP_L +ENTRY (__strncasecmp_sse4_2) +# ifdef PIC + PUSH (%ebx) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax +# else + movl __libc_tsd_LOCALE@NTPOFF, %eax +# endif + movl %gs:(%eax), %eax +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax +# else + movl (%eax), %eax +# endif + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) + jne __strncasecmp_nonascii + jmp L(ascii) +END (__strncasecmp_sse4_2) +#endif + + ENTRY (STRCMP) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movl LOCALE(%esp), %eax +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax +# else + movl (%eax), %eax +# endif + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) + jne NONASCII + +# ifdef PIC + PUSH (%ebx) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx +# endif +L(ascii): + .section .rodata.cst16,"aM",@progbits,16 + .align 16 +.Lbelowupper: + .quad 0x4040404040404040 + .quad 0x4040404040404040 +.Ltopupper: + .quad 0x5b5b5b5b5b5b5b5b + .quad 0x5b5b5b5b5b5b5b5b +.Ltouppermask: + .quad 0x2020202020202020 + .quad 0x2020202020202020 + .previous + +# ifdef PIC +# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx) +# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx) +# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx) +# else +# define UCLOW_reg .Lbelowupper +# define UCHIGH_reg .Ltopupper +# define LCQWORD_reg .Ltouppermask +# endif +#endif + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + PUSH (REM) +#endif +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + PUSH (%edi) #endif mov STR1(%esp), %edx mov STR2(%esp), %eax -#ifdef USE_AS_STRNCMP - movl CNT(%esp), %ebp - test %ebp, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + movl CNT(%esp), REM + test REM, REM je L(eq) #endif mov %dx, %cx @@ -72,10 +195,40 @@ ENTRY (STRCMP) and $0xfff, %ecx cmp $0xff0, %ecx ja L(first4bytes) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# define TOLOWER(reg1, reg2) \ + movdqa reg1, %xmm3; \ + movdqa UCHIGH_reg, %xmm4; \ + movdqa reg2, %xmm5; \ + movdqa UCHIGH_reg, %xmm6; \ + pcmpgtb UCLOW_reg, %xmm3; \ + pcmpgtb reg1, %xmm4; \ + pcmpgtb UCLOW_reg, %xmm5; \ + pcmpgtb reg2, %xmm6; \ + pand %xmm4, %xmm3; \ + pand %xmm6, %xmm5; \ + pand LCQWORD_reg, %xmm3; \ + pand LCQWORD_reg, %xmm5; \ + por %xmm3, reg1; \ + por %xmm5, reg2 + + movdqu (%eax), %xmm1 + TOLOWER (%xmm2, %xmm1) + movd %xmm2, %ecx + movd %xmm1, %edi + movdqa %xmm2, %xmm3 + movdqa %xmm1, %xmm4 + cmpl %edi, %ecx +#else +# define TOLOWER(reg1, reg) + movd %xmm2, %ecx cmp (%eax), %ecx +#endif jne L(less4bytes) +#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L movdqu (%eax), %xmm1 +#endif pxor %xmm2, %xmm1 pxor %xmm0, %xmm0 ptest %xmm1, %xmm0 @@ -84,113 +237,210 @@ ENTRY (STRCMP) ptest %xmm2, %xmm0 jnc L(less16bytes) -#ifdef USE_AS_STRNCMP - sub $16, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub $16, REM jbe L(eq) #endif add $16, %edx add $16, %eax L(first4bytes): movzbl (%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl (%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, (%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $1, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $1, REM je L(eq) #endif movzbl 1(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 1(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 1(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $2, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $2, REM je L(eq) #endif movzbl 2(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 2(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 2(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $3, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $3, REM je L(eq) #endif movzbl 3(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 3(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 3(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $4, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $4, REM je L(eq) #endif movzbl 4(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 4(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 4(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $5, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $5, REM je L(eq) #endif movzbl 5(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 5(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 5(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $6, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $6, REM je L(eq) #endif movzbl 6(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 6(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 6(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $7, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $7, REM je L(eq) #endif movzbl 7(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 7(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 7(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - sub $8, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub $8, REM je L(eq) #endif add $8, %eax add $8, %edx - PUSH (%ebx) +#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L PUSH (%edi) +#endif PUSH (%esi) -#ifdef USE_AS_STRNCMP +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cfi_remember_state #endif mov %edx, %edi mov %eax, %esi xorl %eax, %eax L(check_offset): - movl %edi, %ebx + movl %edi, %edx movl %esi, %ecx - andl $0xfff, %ebx + andl $0xfff, %edx andl $0xfff, %ecx - cmpl %ebx, %ecx - cmovl %ebx, %ecx + cmpl %edx, %ecx + cmovl %edx, %ecx lea -0xff0(%ecx), %edx sub %edx, %edi sub %edx, %esi @@ -199,11 +449,12 @@ L(check_offset): L(loop): movdqu (%esi,%edx), %xmm2 movdqu (%edi,%edx), %xmm1 + TOLOWER (%xmm2, %xmm1) pcmpistri $0x1a, %xmm2, %xmm1 jbe L(end) -#ifdef USE_AS_STRNCMP - sub $16, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub $16, REM jbe L(more16byteseq) #endif @@ -211,13 +462,22 @@ L(loop): jle L(loop) L(crosspage): movzbl (%edi,%edx), %eax - movzbl (%esi,%edx), %ebx - subl %ebx, %eax + movzbl (%esi,%edx), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx +# endif +#endif + subl %ecx, %eax jne L(ret) - testl %ebx, %ebx + testl %ecx, %ecx je L(ret) -#ifdef USE_AS_STRNCMP - sub $1, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub $1, REM jbe L(more16byteseq) #endif inc %edx @@ -230,30 +490,44 @@ L(crosspage): .p2align 4 L(end): jnc L(ret) -#ifdef USE_AS_STRNCMP - sub %ecx, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub %ecx, REM jbe L(more16byteseq) #endif - lea (%ecx,%edx), %ebx - movzbl (%edi,%ebx), %eax - movzbl (%esi,%ebx), %ecx + lea (%ecx,%edx), %ecx + movzbl (%edi,%ecx), %eax + movzbl (%esi,%ecx), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx +# endif +#endif subl %ecx, %eax L(ret): POP (%esi) POP (%edi) +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + POP (REM) +#endif +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC POP (%ebx) -#ifdef USE_AS_STRNCMP - POP (%ebp) +# endif #endif ret .p2align 4 -#ifdef USE_AS_STRNCMP +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cfi_restore_state L(more16byteseq): POP (%esi) +# ifdef USE_AS_STRNCMP POP (%edi) - POP (%ebx) +# endif #endif L(eq): xorl %eax, %eax @@ -269,27 +543,45 @@ L(neq_bigger): L(less16bytes): add $0xfefefeff, %ecx jnc L(less4bytes) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movd %xmm3, %edi + xor %edi, %ecx +#else xor (%edx), %ecx +#endif or $0xfefefeff, %ecx add $1, %ecx jnz L(less4bytes) -#ifdef USE_AS_STRNCMP - cmp $4, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $4, REM jbe L(eq) #endif +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + psrldq $4, %xmm3 + psrldq $4, %xmm4 + movd %xmm3, %ecx + movd %xmm4, %edi + cmp %edi, %ecx + mov %ecx, %edi +#else mov 4(%edx), %ecx cmp 4(%eax), %ecx +#endif jne L(more4bytes) add $0xfefefeff, %ecx jnc L(more4bytes) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + xor %edi, %ecx +#else xor 4(%edx), %ecx +#endif or $0xfefefeff, %ecx add $1, %ecx jnz L(more4bytes) -#ifdef USE_AS_STRNCMP - sub $8, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub $8, REM jbe L(eq) #endif @@ -298,80 +590,176 @@ L(less16bytes): L(less4bytes): movzbl (%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl (%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, (%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $1, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $1, REM je L(eq) #endif movzbl 1(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 1(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 1(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $2, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $2, REM je L(eq) #endif movzbl 2(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 2(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 2(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $3, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $3, REM je L(eq) #endif movzbl 3(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 3(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 3(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) L(more4bytes): -#ifdef USE_AS_STRNCMP - cmp $4, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $4, REM je L(eq) #endif movzbl 4(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 4(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 4(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $5, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $5, REM je L(eq) #endif movzbl 5(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 5(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 5(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $6, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $6, REM je L(eq) #endif movzbl 6(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 6(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 6(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $7, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $7, REM je L(eq) #endif movzbl 7(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 7(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 7(%edx) +#endif jne L(neq) jmp L(eq) diff --git a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S b/sysdeps/i386/i686/multiarch/strcmp-ssse3.S index f8a2c7de83..137596d6cf 100644 --- a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S +++ b/sysdeps/i386/i686/multiarch/strcmp-ssse3.S @@ -123,7 +123,7 @@ ENTRY (__strcasecmp_ssse3) # endif testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) jne __strcasecmp_nonascii - jmp .Lascii + jmp L(ascii) END (__strcasecmp_ssse3) #endif @@ -145,7 +145,7 @@ ENTRY (__strncasecmp_ssse3) # endif testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) jne __strncasecmp_nonascii - jmp .Lascii + jmp L(ascii) END (__strncasecmp_ssse3) #endif @@ -165,7 +165,7 @@ ENTRY (STRCMP) call __i686.get_pc_thunk.bx addl $_GLOBAL_OFFSET_TABLE_, %ebx # endif -.Lascii: +L(ascii): .section .rodata.cst16,"aM",@progbits,16 .align 16 .Lbelowupper: diff --git a/sysdeps/i386/i686/multiarch/strcmp.S b/sysdeps/i386/i686/multiarch/strcmp.S index 1838ea3ff5..28e2d6154c 100644 --- a/sysdeps/i386/i686/multiarch/strcmp.S +++ b/sysdeps/i386/i686/multiarch/strcmp.S @@ -76,12 +76,9 @@ ENTRY(STRCMP) testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) jz 2f leal __STRCMP_SSSE3@GOTOFF(%ebx), %eax -#if 0 - // XXX Temporarily testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx) jz 2f leal __STRCMP_SSE4_2@GOTOFF(%ebx), %eax -#endif 2: popl %ebx cfi_adjust_cfa_offset (-4) cfi_restore (ebx) @@ -98,12 +95,9 @@ ENTRY(STRCMP) testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features jz 2f leal __STRCMP_SSSE3, %eax -#if 0 - // XXX Temporarily testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features jz 2f leal __STRCMP_SSE4_2, %eax -#endif 2: ret END(STRCMP) # endif diff --git a/sysdeps/i386/i686/multiarch/strncase.S b/sysdeps/i386/i686/multiarch/strncase.S index 9b697d1bbc..d20532f993 100644 --- a/sysdeps/i386/i686/multiarch/strncase.S +++ b/sysdeps/i386/i686/multiarch/strncase.S @@ -36,12 +36,9 @@ ENTRY(__strncasecmp) testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) jz 2f leal __strncasecmp_ssse3@GOTOFF(%ebx), %eax -#if 0 - // XXX Temporarily testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx) jz 2f leal __strncasecmp_sse4_2@GOTOFF(%ebx), %eax -#endif 2: popl %ebx cfi_adjust_cfa_offset (-4) cfi_restore (ebx) diff --git a/sysdeps/i386/i686/multiarch/strncase_l-sse4.S b/sysdeps/i386/i686/multiarch/strncase_l-sse4.S new file mode 100644 index 0000000000..557210832e --- /dev/null +++ b/sysdeps/i386/i686/multiarch/strncase_l-sse4.S @@ -0,0 +1,2 @@ +#define USE_AS_STRNCASECMP_L 1 +#include "strcmp-sse4.S"