]> sourceware.org Git - glibc.git/blob - sysdeps/i386/i686/multiarch/memcmp-sse4.S
Consistently use macros for x86 PIC thunks.
[glibc.git] / sysdeps / i386 / i686 / multiarch / memcmp-sse4.S
1 /* memcmp with SSE4.2, wmemcmp with SSE4.2
2 Copyright (C) 2010-2012 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21 #ifndef NOT_IN_libc
22
23 # include <sysdep.h>
24
25 # ifndef MEMCMP
26 # define MEMCMP __memcmp_sse4_2
27 # endif
28
29 # define CFI_PUSH(REG) \
30 cfi_adjust_cfa_offset (4); \
31 cfi_rel_offset (REG, 0)
32
33 # define CFI_POP(REG) \
34 cfi_adjust_cfa_offset (-4); \
35 cfi_restore (REG)
36
37 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
38 # define POP(REG) popl REG; CFI_POP (REG)
39
40 # define PARMS 4
41 # define BLK1 PARMS
42 # define BLK2 BLK1 + 4
43 # define LEN BLK2 + 4
44 # define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
45
46
47 # ifdef SHARED
48 # define JMPTBL(I, B) I - B
49
50 /* Load an entry in a jump table into EBX and branch to it. TABLE is a
51 jump table with relative offsets. INDEX is a register contains the
52 index into the jump table. SCALE is the scale of INDEX. */
53
54 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
55 /* We first load PC into EBX. */ \
56 SETUP_PIC_REG(bx); \
57 /* Get the address of the jump table. */ \
58 addl $(TABLE - .), %ebx; \
59 /* Get the entry and convert the relative offset to the \
60 absolute address. */ \
61 addl (%ebx,INDEX,SCALE), %ebx; \
62 /* We loaded the jump table and adjuested EDX/ESI. Go. */ \
63 jmp *%ebx
64 # else
65 # define JMPTBL(I, B) I
66
67 /* Load an entry in a jump table into EBX and branch to it. TABLE is a
68 jump table with relative offsets. INDEX is a register contains the
69 index into the jump table. SCALE is the scale of INDEX. */
70 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
71 jmp *TABLE(,INDEX,SCALE)
72 # endif
73
74
75 /* Warning!
76 wmemcmp has to use SIGNED comparison for elements.
77 memcmp has to use UNSIGNED comparison for elemnts.
78 */
79
80 .section .text.sse4.2,"ax",@progbits
81 ENTRY (MEMCMP)
82 movl BLK1(%esp), %eax
83 movl BLK2(%esp), %edx
84 movl LEN(%esp), %ecx
85
86 # ifdef USE_AS_WMEMCMP
87 shl $2, %ecx
88 test %ecx, %ecx
89 jz L(return0)
90 # else
91 cmp $1, %ecx
92 jbe L(less1bytes)
93 # endif
94
95 pxor %xmm0, %xmm0
96 cmp $64, %ecx
97 ja L(64bytesormore)
98 cmp $8, %ecx
99
100 # ifndef USE_AS_WMEMCMP
101 PUSH (%ebx)
102 jb L(less8bytes)
103 # else
104 jb L(less8bytes)
105 PUSH (%ebx)
106 # endif
107
108 add %ecx, %edx
109 add %ecx, %eax
110 BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
111
112 # ifndef USE_AS_WMEMCMP
113 .p2align 4
114 L(less8bytes):
115 mov (%eax), %bl
116 cmpb (%edx), %bl
117 jne L(nonzero)
118
119 mov 1(%eax), %bl
120 cmpb 1(%edx), %bl
121 jne L(nonzero)
122
123 cmp $2, %ecx
124 jz L(0bytes)
125
126 mov 2(%eax), %bl
127 cmpb 2(%edx), %bl
128 jne L(nonzero)
129
130 cmp $3, %ecx
131 jz L(0bytes)
132
133 mov 3(%eax), %bl
134 cmpb 3(%edx), %bl
135 jne L(nonzero)
136
137 cmp $4, %ecx
138 jz L(0bytes)
139
140 mov 4(%eax), %bl
141 cmpb 4(%edx), %bl
142 jne L(nonzero)
143
144 cmp $5, %ecx
145 jz L(0bytes)
146
147 mov 5(%eax), %bl
148 cmpb 5(%edx), %bl
149 jne L(nonzero)
150
151 cmp $6, %ecx
152 jz L(0bytes)
153
154 mov 6(%eax), %bl
155 cmpb 6(%edx), %bl
156 je L(0bytes)
157
158 L(nonzero):
159 POP (%ebx)
160 mov $1, %eax
161 ja L(above)
162 neg %eax
163 L(above):
164 ret
165 CFI_PUSH (%ebx)
166 # endif
167
168 .p2align 4
169 L(0bytes):
170 POP (%ebx)
171 xor %eax, %eax
172 ret
173
174 # ifdef USE_AS_WMEMCMP
175
176 /* for wmemcmp, case N == 1 */
177
178 .p2align 4
179 L(less8bytes):
180 mov (%eax), %ecx
181 cmp (%edx), %ecx
182 je L(return0)
183 mov $1, %eax
184 jg L(find_diff_bigger)
185 neg %eax
186 ret
187
188 .p2align 4
189 L(find_diff_bigger):
190 ret
191
192 .p2align 4
193 L(return0):
194 xor %eax, %eax
195 ret
196 # endif
197
198 # ifndef USE_AS_WMEMCMP
199 .p2align 4
200 L(less1bytes):
201 jb L(0bytesend)
202 movzbl (%eax), %eax
203 movzbl (%edx), %edx
204 sub %edx, %eax
205 ret
206
207 .p2align 4
208 L(0bytesend):
209 xor %eax, %eax
210 ret
211 # endif
212 .p2align 4
213 L(64bytesormore):
214 PUSH (%ebx)
215 mov %ecx, %ebx
216 mov $64, %ecx
217 sub $64, %ebx
218 L(64bytesormore_loop):
219 movdqu (%eax), %xmm1
220 movdqu (%edx), %xmm2
221 pxor %xmm1, %xmm2
222 ptest %xmm2, %xmm0
223 jnc L(find_16diff)
224
225 movdqu 16(%eax), %xmm1
226 movdqu 16(%edx), %xmm2
227 pxor %xmm1, %xmm2
228 ptest %xmm2, %xmm0
229 jnc L(find_32diff)
230
231 movdqu 32(%eax), %xmm1
232 movdqu 32(%edx), %xmm2
233 pxor %xmm1, %xmm2
234 ptest %xmm2, %xmm0
235 jnc L(find_48diff)
236
237 movdqu 48(%eax), %xmm1
238 movdqu 48(%edx), %xmm2
239 pxor %xmm1, %xmm2
240 ptest %xmm2, %xmm0
241 jnc L(find_64diff)
242 add %ecx, %eax
243 add %ecx, %edx
244 sub %ecx, %ebx
245 jae L(64bytesormore_loop)
246 add %ebx, %ecx
247 add %ecx, %edx
248 add %ecx, %eax
249 BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
250
251 # ifdef USE_AS_WMEMCMP
252
253 /* Label needs only for table_64bytes filling */
254 L(unreal_case):
255 /* no code here */
256
257 # endif
258 .p2align 4
259 L(find_16diff):
260 sub $16, %ecx
261 L(find_32diff):
262 sub $16, %ecx
263 L(find_48diff):
264 sub $16, %ecx
265 L(find_64diff):
266 add %ecx, %edx
267 add %ecx, %eax
268
269 # ifndef USE_AS_WMEMCMP
270 .p2align 4
271 L(16bytes):
272 mov -16(%eax), %ecx
273 mov -16(%edx), %ebx
274 cmp %ebx, %ecx
275 jne L(find_diff)
276 L(12bytes):
277 mov -12(%eax), %ecx
278 mov -12(%edx), %ebx
279 cmp %ebx, %ecx
280 jne L(find_diff)
281 L(8bytes):
282 mov -8(%eax), %ecx
283 mov -8(%edx), %ebx
284 cmp %ebx, %ecx
285 jne L(find_diff)
286 L(4bytes):
287 mov -4(%eax), %ecx
288 mov -4(%edx), %ebx
289 cmp %ebx, %ecx
290 mov $0, %eax
291 jne L(find_diff)
292 RETURN
293 # else
294 .p2align 4
295 L(16bytes):
296 mov -16(%eax), %ecx
297 cmp -16(%edx), %ecx
298 jne L(find_diff)
299 L(12bytes):
300 mov -12(%eax), %ecx
301 cmp -12(%edx), %ecx
302 jne L(find_diff)
303 L(8bytes):
304 mov -8(%eax), %ecx
305 cmp -8(%edx), %ecx
306 jne L(find_diff)
307 L(4bytes):
308 mov -4(%eax), %ecx
309 cmp -4(%edx), %ecx
310 mov $0, %eax
311 jne L(find_diff)
312 RETURN
313 # endif
314
315 # ifndef USE_AS_WMEMCMP
316 .p2align 4
317 L(49bytes):
318 movdqu -49(%eax), %xmm1
319 movdqu -49(%edx), %xmm2
320 mov $-49, %ebx
321 pxor %xmm1, %xmm2
322 ptest %xmm2, %xmm0
323 jnc L(less16bytes)
324 L(33bytes):
325 movdqu -33(%eax), %xmm1
326 movdqu -33(%edx), %xmm2
327 mov $-33, %ebx
328 pxor %xmm1, %xmm2
329 ptest %xmm2, %xmm0
330 jnc L(less16bytes)
331 L(17bytes):
332 mov -17(%eax), %ecx
333 mov -17(%edx), %ebx
334 cmp %ebx, %ecx
335 jne L(find_diff)
336 L(13bytes):
337 mov -13(%eax), %ecx
338 mov -13(%edx), %ebx
339 cmp %ebx, %ecx
340 jne L(find_diff)
341 L(9bytes):
342 mov -9(%eax), %ecx
343 mov -9(%edx), %ebx
344 cmp %ebx, %ecx
345 jne L(find_diff)
346 L(5bytes):
347 mov -5(%eax), %ecx
348 mov -5(%edx), %ebx
349 cmp %ebx, %ecx
350 jne L(find_diff)
351 movzbl -1(%eax), %ecx
352 cmp -1(%edx), %cl
353 mov $0, %eax
354 jne L(end)
355 RETURN
356
357 .p2align 4
358 L(50bytes):
359 mov $-50, %ebx
360 movdqu -50(%eax), %xmm1
361 movdqu -50(%edx), %xmm2
362 pxor %xmm1, %xmm2
363 ptest %xmm2, %xmm0
364 jnc L(less16bytes)
365 L(34bytes):
366 mov $-34, %ebx
367 movdqu -34(%eax), %xmm1
368 movdqu -34(%edx), %xmm2
369 pxor %xmm1, %xmm2
370 ptest %xmm2, %xmm0
371 jnc L(less16bytes)
372 L(18bytes):
373 mov -18(%eax), %ecx
374 mov -18(%edx), %ebx
375 cmp %ebx, %ecx
376 jne L(find_diff)
377 L(14bytes):
378 mov -14(%eax), %ecx
379 mov -14(%edx), %ebx
380 cmp %ebx, %ecx
381 jne L(find_diff)
382 L(10bytes):
383 mov -10(%eax), %ecx
384 mov -10(%edx), %ebx
385 cmp %ebx, %ecx
386 jne L(find_diff)
387 L(6bytes):
388 mov -6(%eax), %ecx
389 mov -6(%edx), %ebx
390 cmp %ebx, %ecx
391 jne L(find_diff)
392 L(2bytes):
393 movzwl -2(%eax), %ecx
394 movzwl -2(%edx), %ebx
395 cmp %bl, %cl
396 jne L(end)
397 cmp %bh, %ch
398 mov $0, %eax
399 jne L(end)
400 RETURN
401
402 .p2align 4
403 L(51bytes):
404 mov $-51, %ebx
405 movdqu -51(%eax), %xmm1
406 movdqu -51(%edx), %xmm2
407 pxor %xmm1, %xmm2
408 ptest %xmm2, %xmm0
409 jnc L(less16bytes)
410 L(35bytes):
411 mov $-35, %ebx
412 movdqu -35(%eax), %xmm1
413 movdqu -35(%edx), %xmm2
414 pxor %xmm1, %xmm2
415 ptest %xmm2, %xmm0
416 jnc L(less16bytes)
417 L(19bytes):
418 movl -19(%eax), %ecx
419 movl -19(%edx), %ebx
420 cmp %ebx, %ecx
421 jne L(find_diff)
422 L(15bytes):
423 movl -15(%eax), %ecx
424 movl -15(%edx), %ebx
425 cmp %ebx, %ecx
426 jne L(find_diff)
427 L(11bytes):
428 movl -11(%eax), %ecx
429 movl -11(%edx), %ebx
430 cmp %ebx, %ecx
431 jne L(find_diff)
432 L(7bytes):
433 movl -7(%eax), %ecx
434 movl -7(%edx), %ebx
435 cmp %ebx, %ecx
436 jne L(find_diff)
437 L(3bytes):
438 movzwl -3(%eax), %ecx
439 movzwl -3(%edx), %ebx
440 cmpb %bl, %cl
441 jne L(end)
442 cmp %bx, %cx
443 jne L(end)
444 L(1bytes):
445 movzbl -1(%eax), %eax
446 cmpb -1(%edx), %al
447 mov $0, %eax
448 jne L(end)
449 RETURN
450 # endif
451 .p2align 4
452 L(52bytes):
453 movdqu -52(%eax), %xmm1
454 movdqu -52(%edx), %xmm2
455 mov $-52, %ebx
456 pxor %xmm1, %xmm2
457 ptest %xmm2, %xmm0
458 jnc L(less16bytes)
459 L(36bytes):
460 movdqu -36(%eax), %xmm1
461 movdqu -36(%edx), %xmm2
462 mov $-36, %ebx
463 pxor %xmm1, %xmm2
464 ptest %xmm2, %xmm0
465 jnc L(less16bytes)
466 L(20bytes):
467 movdqu -20(%eax), %xmm1
468 movdqu -20(%edx), %xmm2
469 mov $-20, %ebx
470 pxor %xmm1, %xmm2
471 ptest %xmm2, %xmm0
472 jnc L(less16bytes)
473 mov -4(%eax), %ecx
474 # ifndef USE_AS_WMEMCMP
475 mov -4(%edx), %ebx
476 cmp %ebx, %ecx
477 # else
478 cmp -4(%edx), %ecx
479 # endif
480 mov $0, %eax
481 jne L(find_diff)
482 RETURN
483
484 # ifndef USE_AS_WMEMCMP
485 .p2align 4
486 L(53bytes):
487 movdqu -53(%eax), %xmm1
488 movdqu -53(%edx), %xmm2
489 mov $-53, %ebx
490 pxor %xmm1, %xmm2
491 ptest %xmm2, %xmm0
492 jnc L(less16bytes)
493 L(37bytes):
494 mov $-37, %ebx
495 movdqu -37(%eax), %xmm1
496 movdqu -37(%edx), %xmm2
497 pxor %xmm1, %xmm2
498 ptest %xmm2, %xmm0
499 jnc L(less16bytes)
500 L(21bytes):
501 mov $-21, %ebx
502 movdqu -21(%eax), %xmm1
503 movdqu -21(%edx), %xmm2
504 pxor %xmm1, %xmm2
505 ptest %xmm2, %xmm0
506 jnc L(less16bytes)
507 mov -5(%eax), %ecx
508 mov -5(%edx), %ebx
509 cmp %ebx, %ecx
510 jne L(find_diff)
511 movzbl -1(%eax), %ecx
512 cmp -1(%edx), %cl
513 mov $0, %eax
514 jne L(end)
515 RETURN
516
517 .p2align 4
518 L(54bytes):
519 movdqu -54(%eax), %xmm1
520 movdqu -54(%edx), %xmm2
521 mov $-54, %ebx
522 pxor %xmm1, %xmm2
523 ptest %xmm2, %xmm0
524 jnc L(less16bytes)
525 L(38bytes):
526 mov $-38, %ebx
527 movdqu -38(%eax), %xmm1
528 movdqu -38(%edx), %xmm2
529 pxor %xmm1, %xmm2
530 ptest %xmm2, %xmm0
531 jnc L(less16bytes)
532 L(22bytes):
533 mov $-22, %ebx
534 movdqu -22(%eax), %xmm1
535 movdqu -22(%edx), %xmm2
536 pxor %xmm1, %xmm2
537 ptest %xmm2, %xmm0
538 jnc L(less16bytes)
539
540 mov -6(%eax), %ecx
541 mov -6(%edx), %ebx
542 cmp %ebx, %ecx
543 jne L(find_diff)
544 movzwl -2(%eax), %ecx
545 movzwl -2(%edx), %ebx
546 cmp %bl, %cl
547 jne L(end)
548 cmp %bh, %ch
549 mov $0, %eax
550 jne L(end)
551 RETURN
552
553 .p2align 4
554 L(55bytes):
555 movdqu -55(%eax), %xmm1
556 movdqu -55(%edx), %xmm2
557 mov $-55, %ebx
558 pxor %xmm1, %xmm2
559 ptest %xmm2, %xmm0
560 jnc L(less16bytes)
561 L(39bytes):
562 mov $-39, %ebx
563 movdqu -39(%eax), %xmm1
564 movdqu -39(%edx), %xmm2
565 pxor %xmm1, %xmm2
566 ptest %xmm2, %xmm0
567 jnc L(less16bytes)
568 L(23bytes):
569 mov $-23, %ebx
570 movdqu -23(%eax), %xmm1
571 movdqu -23(%edx), %xmm2
572 pxor %xmm1, %xmm2
573 ptest %xmm2, %xmm0
574 jnc L(less16bytes)
575 movl -7(%eax), %ecx
576 movl -7(%edx), %ebx
577 cmp %ebx, %ecx
578 jne L(find_diff)
579 movzwl -3(%eax), %ecx
580 movzwl -3(%edx), %ebx
581 cmpb %bl, %cl
582 jne L(end)
583 cmp %bx, %cx
584 jne L(end)
585 movzbl -1(%eax), %eax
586 cmpb -1(%edx), %al
587 mov $0, %eax
588 jne L(end)
589 RETURN
590 # endif
591 .p2align 4
592 L(56bytes):
593 movdqu -56(%eax), %xmm1
594 movdqu -56(%edx), %xmm2
595 mov $-56, %ebx
596 pxor %xmm1, %xmm2
597 ptest %xmm2, %xmm0
598 jnc L(less16bytes)
599 L(40bytes):
600 mov $-40, %ebx
601 movdqu -40(%eax), %xmm1
602 movdqu -40(%edx), %xmm2
603 pxor %xmm1, %xmm2
604 ptest %xmm2, %xmm0
605 jnc L(less16bytes)
606 L(24bytes):
607 mov $-24, %ebx
608 movdqu -24(%eax), %xmm1
609 movdqu -24(%edx), %xmm2
610 pxor %xmm1, %xmm2
611 ptest %xmm2, %xmm0
612 jnc L(less16bytes)
613
614 mov -8(%eax), %ecx
615 # ifndef USE_AS_WMEMCMP
616 mov -8(%edx), %ebx
617 cmp %ebx, %ecx
618 # else
619 cmp -8(%edx), %ecx
620 # endif
621 jne L(find_diff)
622
623 mov -4(%eax), %ecx
624 # ifndef USE_AS_WMEMCMP
625 mov -4(%edx), %ebx
626 cmp %ebx, %ecx
627 # else
628 cmp -4(%edx), %ecx
629 # endif
630 mov $0, %eax
631 jne L(find_diff)
632 RETURN
633
634 # ifndef USE_AS_WMEMCMP
635 .p2align 4
636 L(57bytes):
637 movdqu -57(%eax), %xmm1
638 movdqu -57(%edx), %xmm2
639 mov $-57, %ebx
640 pxor %xmm1, %xmm2
641 ptest %xmm2, %xmm0
642 jnc L(less16bytes)
643 L(41bytes):
644 mov $-41, %ebx
645 movdqu -41(%eax), %xmm1
646 movdqu -41(%edx), %xmm2
647 pxor %xmm1, %xmm2
648 ptest %xmm2, %xmm0
649 jnc L(less16bytes)
650 L(25bytes):
651 mov $-25, %ebx
652 movdqu -25(%eax), %xmm1
653 movdqu -25(%edx), %xmm2
654 pxor %xmm1, %xmm2
655 ptest %xmm2, %xmm0
656 jnc L(less16bytes)
657 mov -9(%eax), %ecx
658 mov -9(%edx), %ebx
659 cmp %ebx, %ecx
660 jne L(find_diff)
661 mov -5(%eax), %ecx
662 mov -5(%edx), %ebx
663 cmp %ebx, %ecx
664 jne L(find_diff)
665 movzbl -1(%eax), %ecx
666 cmp -1(%edx), %cl
667 mov $0, %eax
668 jne L(end)
669 RETURN
670
671 .p2align 4
672 L(58bytes):
673 movdqu -58(%eax), %xmm1
674 movdqu -58(%edx), %xmm2
675 mov $-58, %ebx
676 pxor %xmm1, %xmm2
677 ptest %xmm2, %xmm0
678 jnc L(less16bytes)
679 L(42bytes):
680 mov $-42, %ebx
681 movdqu -42(%eax), %xmm1
682 movdqu -42(%edx), %xmm2
683 pxor %xmm1, %xmm2
684 ptest %xmm2, %xmm0
685 jnc L(less16bytes)
686 L(26bytes):
687 mov $-26, %ebx
688 movdqu -26(%eax), %xmm1
689 movdqu -26(%edx), %xmm2
690 pxor %xmm1, %xmm2
691 ptest %xmm2, %xmm0
692 jnc L(less16bytes)
693
694 mov -10(%eax), %ecx
695 mov -10(%edx), %ebx
696 cmp %ebx, %ecx
697 jne L(find_diff)
698
699 mov -6(%eax), %ecx
700 mov -6(%edx), %ebx
701 cmp %ebx, %ecx
702 jne L(find_diff)
703
704 movzwl -2(%eax), %ecx
705 movzwl -2(%edx), %ebx
706 cmp %bl, %cl
707 jne L(end)
708 cmp %bh, %ch
709 mov $0, %eax
710 jne L(end)
711 RETURN
712
713 .p2align 4
714 L(59bytes):
715 movdqu -59(%eax), %xmm1
716 movdqu -59(%edx), %xmm2
717 mov $-59, %ebx
718 pxor %xmm1, %xmm2
719 ptest %xmm2, %xmm0
720 jnc L(less16bytes)
721 L(43bytes):
722 mov $-43, %ebx
723 movdqu -43(%eax), %xmm1
724 movdqu -43(%edx), %xmm2
725 pxor %xmm1, %xmm2
726 ptest %xmm2, %xmm0
727 jnc L(less16bytes)
728 L(27bytes):
729 mov $-27, %ebx
730 movdqu -27(%eax), %xmm1
731 movdqu -27(%edx), %xmm2
732 pxor %xmm1, %xmm2
733 ptest %xmm2, %xmm0
734 jnc L(less16bytes)
735 movl -11(%eax), %ecx
736 movl -11(%edx), %ebx
737 cmp %ebx, %ecx
738 jne L(find_diff)
739 movl -7(%eax), %ecx
740 movl -7(%edx), %ebx
741 cmp %ebx, %ecx
742 jne L(find_diff)
743 movzwl -3(%eax), %ecx
744 movzwl -3(%edx), %ebx
745 cmpb %bl, %cl
746 jne L(end)
747 cmp %bx, %cx
748 jne L(end)
749 movzbl -1(%eax), %eax
750 cmpb -1(%edx), %al
751 mov $0, %eax
752 jne L(end)
753 RETURN
754 # endif
755 .p2align 4
756 L(60bytes):
757 movdqu -60(%eax), %xmm1
758 movdqu -60(%edx), %xmm2
759 mov $-60, %ebx
760 pxor %xmm1, %xmm2
761 ptest %xmm2, %xmm0
762 jnc L(less16bytes)
763 L(44bytes):
764 mov $-44, %ebx
765 movdqu -44(%eax), %xmm1
766 movdqu -44(%edx), %xmm2
767 pxor %xmm1, %xmm2
768 ptest %xmm2, %xmm0
769 jnc L(less16bytes)
770 L(28bytes):
771 mov $-28, %ebx
772 movdqu -28(%eax), %xmm1
773 movdqu -28(%edx), %xmm2
774 pxor %xmm1, %xmm2
775 ptest %xmm2, %xmm0
776 jnc L(less16bytes)
777
778 mov -12(%eax), %ecx
779 # ifndef USE_AS_WMEMCMP
780 mov -12(%edx), %ebx
781 cmp %ebx, %ecx
782 # else
783 cmp -12(%edx), %ecx
784 # endif
785 jne L(find_diff)
786
787 mov -8(%eax), %ecx
788 # ifndef USE_AS_WMEMCMP
789 mov -8(%edx), %ebx
790 cmp %ebx, %ecx
791 # else
792 cmp -8(%edx), %ecx
793 # endif
794 jne L(find_diff)
795
796 mov -4(%eax), %ecx
797 # ifndef USE_AS_WMEMCMP
798 mov -4(%edx), %ebx
799 cmp %ebx, %ecx
800 # else
801 cmp -4(%edx), %ecx
802 # endif
803 mov $0, %eax
804 jne L(find_diff)
805 RETURN
806
807 # ifndef USE_AS_WMEMCMP
808 .p2align 4
809 L(61bytes):
810 movdqu -61(%eax), %xmm1
811 movdqu -61(%edx), %xmm2
812 mov $-61, %ebx
813 pxor %xmm1, %xmm2
814 ptest %xmm2, %xmm0
815 jnc L(less16bytes)
816 L(45bytes):
817 mov $-45, %ebx
818 movdqu -45(%eax), %xmm1
819 movdqu -45(%edx), %xmm2
820 pxor %xmm1, %xmm2
821 ptest %xmm2, %xmm0
822 jnc L(less16bytes)
823 L(29bytes):
824 mov $-29, %ebx
825 movdqu -29(%eax), %xmm1
826 movdqu -29(%edx), %xmm2
827 pxor %xmm1, %xmm2
828 ptest %xmm2, %xmm0
829 jnc L(less16bytes)
830
831 mov -13(%eax), %ecx
832 mov -13(%edx), %ebx
833 cmp %ebx, %ecx
834 jne L(find_diff)
835
836 mov -9(%eax), %ecx
837 mov -9(%edx), %ebx
838 cmp %ebx, %ecx
839 jne L(find_diff)
840
841 mov -5(%eax), %ecx
842 mov -5(%edx), %ebx
843 cmp %ebx, %ecx
844 jne L(find_diff)
845 movzbl -1(%eax), %ecx
846 cmp -1(%edx), %cl
847 mov $0, %eax
848 jne L(end)
849 RETURN
850
851 .p2align 4
852 L(62bytes):
853 movdqu -62(%eax), %xmm1
854 movdqu -62(%edx), %xmm2
855 mov $-62, %ebx
856 pxor %xmm1, %xmm2
857 ptest %xmm2, %xmm0
858 jnc L(less16bytes)
859 L(46bytes):
860 mov $-46, %ebx
861 movdqu -46(%eax), %xmm1
862 movdqu -46(%edx), %xmm2
863 pxor %xmm1, %xmm2
864 ptest %xmm2, %xmm0
865 jnc L(less16bytes)
866 L(30bytes):
867 mov $-30, %ebx
868 movdqu -30(%eax), %xmm1
869 movdqu -30(%edx), %xmm2
870 pxor %xmm1, %xmm2
871 ptest %xmm2, %xmm0
872 jnc L(less16bytes)
873 mov -14(%eax), %ecx
874 mov -14(%edx), %ebx
875 cmp %ebx, %ecx
876 jne L(find_diff)
877 mov -10(%eax), %ecx
878 mov -10(%edx), %ebx
879 cmp %ebx, %ecx
880 jne L(find_diff)
881 mov -6(%eax), %ecx
882 mov -6(%edx), %ebx
883 cmp %ebx, %ecx
884 jne L(find_diff)
885 movzwl -2(%eax), %ecx
886 movzwl -2(%edx), %ebx
887 cmp %bl, %cl
888 jne L(end)
889 cmp %bh, %ch
890 mov $0, %eax
891 jne L(end)
892 RETURN
893
894 .p2align 4
895 L(63bytes):
896 movdqu -63(%eax), %xmm1
897 movdqu -63(%edx), %xmm2
898 mov $-63, %ebx
899 pxor %xmm1, %xmm2
900 ptest %xmm2, %xmm0
901 jnc L(less16bytes)
902 L(47bytes):
903 mov $-47, %ebx
904 movdqu -47(%eax), %xmm1
905 movdqu -47(%edx), %xmm2
906 pxor %xmm1, %xmm2
907 ptest %xmm2, %xmm0
908 jnc L(less16bytes)
909 L(31bytes):
910 mov $-31, %ebx
911 movdqu -31(%eax), %xmm1
912 movdqu -31(%edx), %xmm2
913 pxor %xmm1, %xmm2
914 ptest %xmm2, %xmm0
915 jnc L(less16bytes)
916
917 movl -15(%eax), %ecx
918 movl -15(%edx), %ebx
919 cmp %ebx, %ecx
920 jne L(find_diff)
921 movl -11(%eax), %ecx
922 movl -11(%edx), %ebx
923 cmp %ebx, %ecx
924 jne L(find_diff)
925 movl -7(%eax), %ecx
926 movl -7(%edx), %ebx
927 cmp %ebx, %ecx
928 jne L(find_diff)
929 movzwl -3(%eax), %ecx
930 movzwl -3(%edx), %ebx
931 cmpb %bl, %cl
932 jne L(end)
933 cmp %bx, %cx
934 jne L(end)
935 movzbl -1(%eax), %eax
936 cmpb -1(%edx), %al
937 mov $0, %eax
938 jne L(end)
939 RETURN
940 # endif
941
942 .p2align 4
943 L(64bytes):
944 movdqu -64(%eax), %xmm1
945 movdqu -64(%edx), %xmm2
946 mov $-64, %ebx
947 pxor %xmm1, %xmm2
948 ptest %xmm2, %xmm0
949 jnc L(less16bytes)
950 L(48bytes):
951 movdqu -48(%eax), %xmm1
952 movdqu -48(%edx), %xmm2
953 mov $-48, %ebx
954 pxor %xmm1, %xmm2
955 ptest %xmm2, %xmm0
956 jnc L(less16bytes)
957 L(32bytes):
958 movdqu -32(%eax), %xmm1
959 movdqu -32(%edx), %xmm2
960 mov $-32, %ebx
961 pxor %xmm1, %xmm2
962 ptest %xmm2, %xmm0
963 jnc L(less16bytes)
964
965 mov -16(%eax), %ecx
966 # ifndef USE_AS_WMEMCMP
967 mov -16(%edx), %ebx
968 cmp %ebx, %ecx
969 # else
970 cmp -16(%edx), %ecx
971 # endif
972 jne L(find_diff)
973
974 mov -12(%eax), %ecx
975 # ifndef USE_AS_WMEMCMP
976 mov -12(%edx), %ebx
977 cmp %ebx, %ecx
978 # else
979 cmp -12(%edx), %ecx
980 # endif
981 jne L(find_diff)
982
983 mov -8(%eax), %ecx
984 # ifndef USE_AS_WMEMCMP
985 mov -8(%edx), %ebx
986 cmp %ebx, %ecx
987 # else
988 cmp -8(%edx), %ecx
989 # endif
990 jne L(find_diff)
991
992 mov -4(%eax), %ecx
993 # ifndef USE_AS_WMEMCMP
994 mov -4(%edx), %ebx
995 cmp %ebx, %ecx
996 # else
997 cmp -4(%edx), %ecx
998 # endif
999 mov $0, %eax
1000 jne L(find_diff)
1001 RETURN
1002
1003 # ifndef USE_AS_WMEMCMP
1004 .p2align 4
1005 L(less16bytes):
1006 add %ebx, %eax
1007 add %ebx, %edx
1008
1009 mov (%eax), %ecx
1010 mov (%edx), %ebx
1011 cmp %ebx, %ecx
1012 jne L(find_diff)
1013
1014 mov 4(%eax), %ecx
1015 mov 4(%edx), %ebx
1016 cmp %ebx, %ecx
1017 jne L(find_diff)
1018
1019 mov 8(%eax), %ecx
1020 mov 8(%edx), %ebx
1021 cmp %ebx, %ecx
1022 jne L(find_diff)
1023
1024 mov 12(%eax), %ecx
1025 mov 12(%edx), %ebx
1026 cmp %ebx, %ecx
1027 mov $0, %eax
1028 jne L(find_diff)
1029 RETURN
1030 # else
1031 .p2align 4
1032 L(less16bytes):
1033 add %ebx, %eax
1034 add %ebx, %edx
1035
1036 mov (%eax), %ecx
1037 cmp (%edx), %ecx
1038 jne L(find_diff)
1039
1040 mov 4(%eax), %ecx
1041 cmp 4(%edx), %ecx
1042 jne L(find_diff)
1043
1044 mov 8(%eax), %ecx
1045 cmp 8(%edx), %ecx
1046 jne L(find_diff)
1047
1048 mov 12(%eax), %ecx
1049 cmp 12(%edx), %ecx
1050
1051 mov $0, %eax
1052 jne L(find_diff)
1053 RETURN
1054 # endif
1055
1056 .p2align 4
1057 L(find_diff):
1058 # ifndef USE_AS_WMEMCMP
1059 cmpb %bl, %cl
1060 jne L(end)
1061 cmp %bx, %cx
1062 jne L(end)
1063 shr $16,%ecx
1064 shr $16,%ebx
1065 cmp %bl, %cl
1066 jne L(end)
1067 cmp %bx, %cx
1068 L(end):
1069 POP (%ebx)
1070 mov $1, %eax
1071 ja L(bigger)
1072 neg %eax
1073 L(bigger):
1074 ret
1075 # else
1076 POP (%ebx)
1077 mov $1, %eax
1078 jg L(bigger)
1079 neg %eax
1080 ret
1081
1082 .p2align 4
1083 L(bigger):
1084 ret
1085 # endif
1086 END (MEMCMP)
1087
1088 .section .rodata.sse4.2,"a",@progbits
1089 .p2align 2
1090 .type L(table_64bytes), @object
1091 # ifndef USE_AS_WMEMCMP
1092 L(table_64bytes):
1093 .int JMPTBL (L(0bytes), L(table_64bytes))
1094 .int JMPTBL (L(1bytes), L(table_64bytes))
1095 .int JMPTBL (L(2bytes), L(table_64bytes))
1096 .int JMPTBL (L(3bytes), L(table_64bytes))
1097 .int JMPTBL (L(4bytes), L(table_64bytes))
1098 .int JMPTBL (L(5bytes), L(table_64bytes))
1099 .int JMPTBL (L(6bytes), L(table_64bytes))
1100 .int JMPTBL (L(7bytes), L(table_64bytes))
1101 .int JMPTBL (L(8bytes), L(table_64bytes))
1102 .int JMPTBL (L(9bytes), L(table_64bytes))
1103 .int JMPTBL (L(10bytes), L(table_64bytes))
1104 .int JMPTBL (L(11bytes), L(table_64bytes))
1105 .int JMPTBL (L(12bytes), L(table_64bytes))
1106 .int JMPTBL (L(13bytes), L(table_64bytes))
1107 .int JMPTBL (L(14bytes), L(table_64bytes))
1108 .int JMPTBL (L(15bytes), L(table_64bytes))
1109 .int JMPTBL (L(16bytes), L(table_64bytes))
1110 .int JMPTBL (L(17bytes), L(table_64bytes))
1111 .int JMPTBL (L(18bytes), L(table_64bytes))
1112 .int JMPTBL (L(19bytes), L(table_64bytes))
1113 .int JMPTBL (L(20bytes), L(table_64bytes))
1114 .int JMPTBL (L(21bytes), L(table_64bytes))
1115 .int JMPTBL (L(22bytes), L(table_64bytes))
1116 .int JMPTBL (L(23bytes), L(table_64bytes))
1117 .int JMPTBL (L(24bytes), L(table_64bytes))
1118 .int JMPTBL (L(25bytes), L(table_64bytes))
1119 .int JMPTBL (L(26bytes), L(table_64bytes))
1120 .int JMPTBL (L(27bytes), L(table_64bytes))
1121 .int JMPTBL (L(28bytes), L(table_64bytes))
1122 .int JMPTBL (L(29bytes), L(table_64bytes))
1123 .int JMPTBL (L(30bytes), L(table_64bytes))
1124 .int JMPTBL (L(31bytes), L(table_64bytes))
1125 .int JMPTBL (L(32bytes), L(table_64bytes))
1126 .int JMPTBL (L(33bytes), L(table_64bytes))
1127 .int JMPTBL (L(34bytes), L(table_64bytes))
1128 .int JMPTBL (L(35bytes), L(table_64bytes))
1129 .int JMPTBL (L(36bytes), L(table_64bytes))
1130 .int JMPTBL (L(37bytes), L(table_64bytes))
1131 .int JMPTBL (L(38bytes), L(table_64bytes))
1132 .int JMPTBL (L(39bytes), L(table_64bytes))
1133 .int JMPTBL (L(40bytes), L(table_64bytes))
1134 .int JMPTBL (L(41bytes), L(table_64bytes))
1135 .int JMPTBL (L(42bytes), L(table_64bytes))
1136 .int JMPTBL (L(43bytes), L(table_64bytes))
1137 .int JMPTBL (L(44bytes), L(table_64bytes))
1138 .int JMPTBL (L(45bytes), L(table_64bytes))
1139 .int JMPTBL (L(46bytes), L(table_64bytes))
1140 .int JMPTBL (L(47bytes), L(table_64bytes))
1141 .int JMPTBL (L(48bytes), L(table_64bytes))
1142 .int JMPTBL (L(49bytes), L(table_64bytes))
1143 .int JMPTBL (L(50bytes), L(table_64bytes))
1144 .int JMPTBL (L(51bytes), L(table_64bytes))
1145 .int JMPTBL (L(52bytes), L(table_64bytes))
1146 .int JMPTBL (L(53bytes), L(table_64bytes))
1147 .int JMPTBL (L(54bytes), L(table_64bytes))
1148 .int JMPTBL (L(55bytes), L(table_64bytes))
1149 .int JMPTBL (L(56bytes), L(table_64bytes))
1150 .int JMPTBL (L(57bytes), L(table_64bytes))
1151 .int JMPTBL (L(58bytes), L(table_64bytes))
1152 .int JMPTBL (L(59bytes), L(table_64bytes))
1153 .int JMPTBL (L(60bytes), L(table_64bytes))
1154 .int JMPTBL (L(61bytes), L(table_64bytes))
1155 .int JMPTBL (L(62bytes), L(table_64bytes))
1156 .int JMPTBL (L(63bytes), L(table_64bytes))
1157 .int JMPTBL (L(64bytes), L(table_64bytes))
1158 # else
1159 L(table_64bytes):
1160 .int JMPTBL (L(0bytes), L(table_64bytes))
1161 .int JMPTBL (L(unreal_case), L(table_64bytes))
1162 .int JMPTBL (L(unreal_case), L(table_64bytes))
1163 .int JMPTBL (L(unreal_case), L(table_64bytes))
1164 .int JMPTBL (L(4bytes), L(table_64bytes))
1165 .int JMPTBL (L(unreal_case), L(table_64bytes))
1166 .int JMPTBL (L(unreal_case), L(table_64bytes))
1167 .int JMPTBL (L(unreal_case), L(table_64bytes))
1168 .int JMPTBL (L(8bytes), L(table_64bytes))
1169 .int JMPTBL (L(unreal_case), L(table_64bytes))
1170 .int JMPTBL (L(unreal_case), L(table_64bytes))
1171 .int JMPTBL (L(unreal_case), L(table_64bytes))
1172 .int JMPTBL (L(12bytes), L(table_64bytes))
1173 .int JMPTBL (L(unreal_case), L(table_64bytes))
1174 .int JMPTBL (L(unreal_case), L(table_64bytes))
1175 .int JMPTBL (L(unreal_case), L(table_64bytes))
1176 .int JMPTBL (L(16bytes), L(table_64bytes))
1177 .int JMPTBL (L(unreal_case), L(table_64bytes))
1178 .int JMPTBL (L(unreal_case), L(table_64bytes))
1179 .int JMPTBL (L(unreal_case), L(table_64bytes))
1180 .int JMPTBL (L(20bytes), L(table_64bytes))
1181 .int JMPTBL (L(unreal_case), L(table_64bytes))
1182 .int JMPTBL (L(unreal_case), L(table_64bytes))
1183 .int JMPTBL (L(unreal_case), L(table_64bytes))
1184 .int JMPTBL (L(24bytes), L(table_64bytes))
1185 .int JMPTBL (L(unreal_case), L(table_64bytes))
1186 .int JMPTBL (L(unreal_case), L(table_64bytes))
1187 .int JMPTBL (L(unreal_case), L(table_64bytes))
1188 .int JMPTBL (L(28bytes), L(table_64bytes))
1189 .int JMPTBL (L(unreal_case), L(table_64bytes))
1190 .int JMPTBL (L(unreal_case), L(table_64bytes))
1191 .int JMPTBL (L(unreal_case), L(table_64bytes))
1192 .int JMPTBL (L(32bytes), L(table_64bytes))
1193 .int JMPTBL (L(unreal_case), L(table_64bytes))
1194 .int JMPTBL (L(unreal_case), L(table_64bytes))
1195 .int JMPTBL (L(unreal_case), L(table_64bytes))
1196 .int JMPTBL (L(36bytes), L(table_64bytes))
1197 .int JMPTBL (L(unreal_case), L(table_64bytes))
1198 .int JMPTBL (L(unreal_case), L(table_64bytes))
1199 .int JMPTBL (L(unreal_case), L(table_64bytes))
1200 .int JMPTBL (L(40bytes), L(table_64bytes))
1201 .int JMPTBL (L(unreal_case), L(table_64bytes))
1202 .int JMPTBL (L(unreal_case), L(table_64bytes))
1203 .int JMPTBL (L(unreal_case), L(table_64bytes))
1204 .int JMPTBL (L(44bytes), L(table_64bytes))
1205 .int JMPTBL (L(unreal_case), L(table_64bytes))
1206 .int JMPTBL (L(unreal_case), L(table_64bytes))
1207 .int JMPTBL (L(unreal_case), L(table_64bytes))
1208 .int JMPTBL (L(48bytes), L(table_64bytes))
1209 .int JMPTBL (L(unreal_case), L(table_64bytes))
1210 .int JMPTBL (L(unreal_case), L(table_64bytes))
1211 .int JMPTBL (L(unreal_case), L(table_64bytes))
1212 .int JMPTBL (L(52bytes), L(table_64bytes))
1213 .int JMPTBL (L(unreal_case), L(table_64bytes))
1214 .int JMPTBL (L(unreal_case), L(table_64bytes))
1215 .int JMPTBL (L(unreal_case), L(table_64bytes))
1216 .int JMPTBL (L(56bytes), L(table_64bytes))
1217 .int JMPTBL (L(unreal_case), L(table_64bytes))
1218 .int JMPTBL (L(unreal_case), L(table_64bytes))
1219 .int JMPTBL (L(unreal_case), L(table_64bytes))
1220 .int JMPTBL (L(60bytes), L(table_64bytes))
1221 .int JMPTBL (L(unreal_case), L(table_64bytes))
1222 .int JMPTBL (L(unreal_case), L(table_64bytes))
1223 .int JMPTBL (L(unreal_case), L(table_64bytes))
1224 .int JMPTBL (L(64bytes), L(table_64bytes))
1225 # endif
1226 #endif
This page took 0.090598 seconds and 5 git commands to generate.