This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch hjl/wcsrchr/sse2 created. glibc-2.25-374-g9b644f0
- From: hjl at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 26 May 2017 12:26:20 -0000
- Subject: GNU C Library master sources branch hjl/wcsrchr/sse2 created. glibc-2.25-374-g9b644f0
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, hjl/wcsrchr/sse2 has been created
at 9b644f055edef77a3fe02cb582a7e904b539388f (commit)
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=9b644f055edef77a3fe02cb582a7e904b539388f
commit 9b644f055edef77a3fe02cb582a7e904b539388f
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Thu May 25 20:58:44 2017 -0700
x86-64: Update strrchr.S to support wcsrchr
diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S
index e6a33bc..11e5fbd 100644
--- a/sysdeps/x86_64/strrchr.S
+++ b/sysdeps/x86_64/strrchr.S
@@ -19,23 +19,40 @@
#include <sysdep.h>
+#ifdef USE_AS_WCSRCHR
+# define PCMPEQ pcmpeqd
+# define PMINU pminud
+#else
+# define PCMPEQ pcmpeqb
+# define PMINU pminub
+#endif
+
.text
ENTRY (strrchr)
movd %esi, %xmm1
movq %rdi, %rax
andl $4095, %eax
+#ifdef USE_AS_WCSRCHR
+ movabsq $0x1111111111111111, %r11
+ cmpq $4032, %rax
+#else
punpcklbw %xmm1, %xmm1
cmpq $4032, %rax
punpcklwd %xmm1, %xmm1
+#endif
pshufd $0, %xmm1, %xmm1
ja L(cross_page)
movdqu (%rdi), %xmm0
pxor %xmm2, %xmm2
movdqa %xmm0, %xmm3
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm3
+ PCMPEQ %xmm1, %xmm0
+ PCMPEQ %xmm2, %xmm3
pmovmskb %xmm0, %ecx
pmovmskb %xmm3, %edx
+#ifdef USE_AS_WCSRCHR
+ andl %r11d, %ecx
+ andl %r11d, %edx
+#endif
testq %rdx, %rdx
je L(next_48_bytes)
leaq -1(%rdx), %rax
@@ -51,22 +68,25 @@ L(next_48_bytes):
movdqu 16(%rdi), %xmm4
movdqa %xmm4, %xmm5
movdqu 32(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm4
- pcmpeqb %xmm2, %xmm5
+ PCMPEQ %xmm1, %xmm4
+ PCMPEQ %xmm2, %xmm5
movdqu 48(%rdi), %xmm0
pmovmskb %xmm5, %edx
movdqa %xmm3, %xmm5
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm2, %xmm5
- pcmpeqb %xmm0, %xmm2
+ PCMPEQ %xmm1, %xmm3
+ PCMPEQ %xmm2, %xmm5
+ PCMPEQ %xmm0, %xmm2
salq $16, %rdx
pmovmskb %xmm3, %r8d
pmovmskb %xmm5, %eax
pmovmskb %xmm2, %esi
salq $32, %r8
salq $32, %rax
- pcmpeqb %xmm1, %xmm0
+ PCMPEQ %xmm1, %xmm0
orq %rdx, %rax
+#ifdef USE_AS_WCSRCHR
+ andq %r11, %rax
+#endif
movq %rsi, %rdx
pmovmskb %xmm4, %esi
salq $48, %rdx
@@ -76,6 +96,9 @@ L(next_48_bytes):
pmovmskb %xmm0, %ecx
salq $48, %rcx
orq %rcx, %rsi
+#ifdef USE_AS_WCSRCHR
+ andq %r11, %rsi
+#endif
orq %rdx, %rax
je L(loop_header2)
leaq -1(%rax), %rcx
@@ -109,38 +132,41 @@ L(loop_entry):
movdqa 48(%rdi), %xmm2
movdqa %xmm3, %xmm0
movdqa 16(%rdi), %xmm4
- pminub %xmm2, %xmm0
+ PMINU %xmm2, %xmm0
movdqa (%rdi), %xmm5
- pminub %xmm4, %xmm0
- pminub %xmm5, %xmm0
- pcmpeqb %xmm7, %xmm0
+ PMINU %xmm4, %xmm0
+ PMINU %xmm5, %xmm0
+ PCMPEQ %xmm7, %xmm0
pmovmskb %xmm0, %eax
movdqa %xmm5, %xmm0
- pcmpeqb %xmm1, %xmm0
+ PCMPEQ %xmm1, %xmm0
pmovmskb %xmm0, %r9d
movdqa %xmm4, %xmm0
- pcmpeqb %xmm1, %xmm0
+ PCMPEQ %xmm1, %xmm0
pmovmskb %xmm0, %edx
movdqa %xmm3, %xmm0
- pcmpeqb %xmm1, %xmm0
+ PCMPEQ %xmm1, %xmm0
salq $16, %rdx
pmovmskb %xmm0, %r10d
movdqa %xmm2, %xmm0
- pcmpeqb %xmm1, %xmm0
+ PCMPEQ %xmm1, %xmm0
salq $32, %r10
orq %r10, %rdx
pmovmskb %xmm0, %r8d
orq %r9, %rdx
salq $48, %r8
orq %r8, %rdx
+#ifdef USE_AS_WCSRCHR
+ andq %r11, %rdx
+#endif
testl %eax, %eax
je L(loop64)
- pcmpeqb %xmm6, %xmm4
- pcmpeqb %xmm6, %xmm3
- pcmpeqb %xmm6, %xmm5
+ PCMPEQ %xmm6, %xmm4
+ PCMPEQ %xmm6, %xmm3
+ PCMPEQ %xmm6, %xmm5
pmovmskb %xmm4, %eax
pmovmskb %xmm3, %r10d
- pcmpeqb %xmm6, %xmm2
+ PCMPEQ %xmm6, %xmm2
pmovmskb %xmm5, %r9d
salq $32, %r10
salq $16, %rax
@@ -149,6 +175,9 @@ L(loop_entry):
orq %r9, %rax
salq $48, %r8
orq %r8, %rax
+#ifdef USE_AS_WCSRCHR
+ andq %r11, %rax
+#endif
leaq -1(%rax), %r8
xorq %rax, %r8
andq %r8, %rdx
@@ -177,26 +206,26 @@ L(cross_page):
movdqu (%rax), %xmm5
movdqa %xmm5, %xmm6
movdqu 16(%rax), %xmm4
- pcmpeqb %xmm1, %xmm5
- pcmpeqb %xmm0, %xmm6
+ PCMPEQ %xmm1, %xmm5
+ PCMPEQ %xmm0, %xmm6
movdqu 32(%rax), %xmm3
pmovmskb %xmm6, %esi
movdqa %xmm4, %xmm6
movdqu 48(%rax), %xmm2
- pcmpeqb %xmm1, %xmm4
- pcmpeqb %xmm0, %xmm6
+ PCMPEQ %xmm1, %xmm4
+ PCMPEQ %xmm0, %xmm6
pmovmskb %xmm6, %edx
movdqa %xmm3, %xmm6
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm0, %xmm6
- pcmpeqb %xmm2, %xmm0
+ PCMPEQ %xmm1, %xmm3
+ PCMPEQ %xmm0, %xmm6
+ PCMPEQ %xmm2, %xmm0
salq $16, %rdx
pmovmskb %xmm3, %r9d
pmovmskb %xmm6, %r8d
pmovmskb %xmm0, %ecx
salq $32, %r9
salq $32, %r8
- pcmpeqb %xmm1, %xmm2
+ PCMPEQ %xmm1, %xmm2
orq %r8, %rdx
salq $48, %rcx
pmovmskb %xmm5, %r8d
@@ -209,6 +238,10 @@ L(cross_page):
orq %r9, %rsi
orq %r8, %rsi
orq %rcx, %rsi
+#ifdef USE_AS_WCSRCHR
+ andq %r11, %rdx
+ andq %r11, %rsi
+#endif
movl %edi, %ecx
subl %eax, %ecx
shrq %cl, %rdx
@@ -224,5 +257,7 @@ L(cross_page):
ret
END (strrchr)
+#ifndef USE_AS_WCSRCHR
weak_alias (strrchr, rindex)
libc_hidden_builtin_def (strrchr)
+#endif
diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S
index a6c385c..9bfd754 100644
--- a/sysdeps/x86_64/wcsrchr.S
+++ b/sysdeps/x86_64/wcsrchr.S
@@ -1,282 +1,4 @@
-/* wcsrchr with SSSE3
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
+#define USE_AS_WCSRCHR 1
+#define strrchr wcsrchr
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- .text
-ENTRY (wcsrchr)
-
- movd %rsi, %xmm1
- mov %rdi, %rcx
- punpckldq %xmm1, %xmm1
- pxor %xmm2, %xmm2
- punpckldq %xmm1, %xmm1
- and $63, %rcx
- cmp $48, %rcx
- ja L(crosscache)
-
- movdqu (%rdi), %xmm0
- pcmpeqd %xmm0, %xmm2
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm2, %rcx
- pmovmskb %xmm0, %rax
- add $16, %rdi
-
- test %rax, %rax
- jnz L(unaligned_match1)
-
- test %rcx, %rcx
- jnz L(return_null)
-
- and $-16, %rdi
- xor %r8, %r8
- jmp L(loop)
-
- .p2align 4
-L(unaligned_match1):
- test %rcx, %rcx
- jnz L(prolog_find_zero_1)
-
- mov %rax, %r8
- mov %rdi, %rsi
- and $-16, %rdi
- jmp L(loop)
-
- .p2align 4
-L(crosscache):
- and $15, %rcx
- and $-16, %rdi
- pxor %xmm3, %xmm3
- movdqa (%rdi), %xmm0
- pcmpeqd %xmm0, %xmm3
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm3, %rdx
- pmovmskb %xmm0, %rax
- shr %cl, %rdx
- shr %cl, %rax
- add $16, %rdi
-
- test %rax, %rax
- jnz L(unaligned_match)
-
- test %rdx, %rdx
- jnz L(return_null)
-
- xor %r8, %r8
- jmp L(loop)
-
- .p2align 4
-L(unaligned_match):
- test %rdx, %rdx
- jnz L(prolog_find_zero)
-
- mov %rax, %r8
- lea (%rdi, %rcx), %rsi
-
-/* Loop start on aligned string. */
- .p2align 4
-L(loop):
- movdqa (%rdi), %xmm0
- pcmpeqd %xmm0, %xmm2
- add $16, %rdi
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm2, %rcx
- pmovmskb %xmm0, %rax
- or %rax, %rcx
- jnz L(matches)
-
- movdqa (%rdi), %xmm3
- pcmpeqd %xmm3, %xmm2
- add $16, %rdi
- pcmpeqd %xmm1, %xmm3
- pmovmskb %xmm2, %rcx
- pmovmskb %xmm3, %rax
- or %rax, %rcx
- jnz L(matches)
-
- movdqa (%rdi), %xmm4
- pcmpeqd %xmm4, %xmm2
- add $16, %rdi
- pcmpeqd %xmm1, %xmm4
- pmovmskb %xmm2, %rcx
- pmovmskb %xmm4, %rax
- or %rax, %rcx
- jnz L(matches)
-
- movdqa (%rdi), %xmm5
- pcmpeqd %xmm5, %xmm2
- add $16, %rdi
- pcmpeqd %xmm1, %xmm5
- pmovmskb %xmm2, %rcx
- pmovmskb %xmm5, %rax
- or %rax, %rcx
- jz L(loop)
-
- .p2align 4
-L(matches):
- test %rax, %rax
- jnz L(match)
-L(return_value):
- test %r8, %r8
- jz L(return_null)
- mov %r8, %rax
- mov %rsi, %rdi
-
- test $15 << 4, %ah
- jnz L(match_fourth_wchar)
- test %ah, %ah
- jnz L(match_third_wchar)
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(match):
- pmovmskb %xmm2, %rcx
- test %rcx, %rcx
- jnz L(find_zero)
- mov %rax, %r8
- mov %rdi, %rsi
- jmp L(loop)
-
- .p2align 4
-L(find_zero):
- test $15, %cl
- jnz L(find_zero_in_first_wchar)
- test %cl, %cl
- jnz L(find_zero_in_second_wchar)
- test $15, %ch
- jnz L(find_zero_in_third_wchar)
-
- and $1 << 13 - 1, %rax
- jz L(return_value)
-
- test $15 << 4, %ah
- jnz L(match_fourth_wchar)
- test %ah, %ah
- jnz L(match_third_wchar)
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(find_zero_in_first_wchar):
- test $1, %rax
- jz L(return_value)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(find_zero_in_second_wchar):
- and $1 << 5 - 1, %rax
- jz L(return_value)
-
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(find_zero_in_third_wchar):
- and $1 << 9 - 1, %rax
- jz L(return_value)
-
- test %ah, %ah
- jnz L(match_third_wchar)
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(prolog_find_zero):
- add %rcx, %rdi
- mov %rdx, %rcx
-L(prolog_find_zero_1):
- test $15, %cl
- jnz L(prolog_find_zero_in_first_wchar)
- test %cl, %cl
- jnz L(prolog_find_zero_in_second_wchar)
- test $15, %ch
- jnz L(prolog_find_zero_in_third_wchar)
-
- and $1 << 13 - 1, %rax
- jz L(return_null)
-
- test $15 << 4, %ah
- jnz L(match_fourth_wchar)
- test %ah, %ah
- jnz L(match_third_wchar)
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(prolog_find_zero_in_first_wchar):
- test $1, %rax
- jz L(return_null)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(prolog_find_zero_in_second_wchar):
- and $1 << 5 - 1, %rax
- jz L(return_null)
-
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(prolog_find_zero_in_third_wchar):
- and $1 << 9 - 1, %rax
- jz L(return_null)
-
- test %ah, %ah
- jnz L(match_third_wchar)
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(match_second_wchar):
- lea -12(%rdi), %rax
- ret
-
- .p2align 4
-L(match_third_wchar):
- lea -8(%rdi), %rax
- ret
-
- .p2align 4
-L(match_fourth_wchar):
- lea -4(%rdi), %rax
- ret
-
- .p2align 4
-L(return_null):
- xor %rax, %rax
- ret
-
-END (wcsrchr)
+#include "strrchr.S"
-----------------------------------------------------------------------
hooks/post-receive
--
GNU C Library master sources