From: H.J. Lu Date: Mon, 30 Oct 2017 17:02:16 +0000 (-0700) Subject: i586: Use conditional branches in strcpy.S [BZ #22353] X-Git-Tag: glibc-2.27~575 X-Git-Url: https://sourceware.org/git/?a=commitdiff_plain;h=c5cc45148c89cc5c57d1946348dd242d4db5c5f5;p=glibc.git i586: Use conditional branches in strcpy.S [BZ #22353] i586 strcpy.S used a clever trick with LEA to implement jump table: /* ECX has the last 2 bits of the address of source - 1. */ andl $3, %ecx call 2f 2: popl %edx /* 0xb is the distance between 2: and 1:. */ leal 0xb(%edx,%ecx,8), %ecx jmp *%ecx .align 8 1: /* ECX == 0 */ orb (%esi), %al jz L(end) stosb xorl %eax, %eax incl %esi /* ECX == 1 */ orb (%esi), %al jz L(end) stosb xorl %eax, %eax incl %esi /* ECX == 2 */ orb (%esi), %al jz L(end) stosb xorl %eax, %eax incl %esi /* ECX == 3 */ L(1): movl (%esi), %ecx leal 4(%esi),%esi This fails if there are instruction length changes before L(1):. This patch replaces it with conditional branches: cmpb $2, %cl je L(Src2) ja L(Src3) cmpb $1, %cl je L(Src1) L(Src0): which have similar performance and work with any instruction lengths. Tested on i586 and i686 with and without --disable-multi-arch. [BZ #22353] * sysdeps/i386/i586/strcpy.S (STRCPY): Use conditional branches. (1): Renamed to ... (L(Src0)): This. (L(Src1)): New. (L(Src2)): Likewise. (L(1)): Renamed to ... (L(Src3)): This. --- diff --git a/ChangeLog b/ChangeLog index 59612389c9..5ea3d856a1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2017-10-30 H.J. Lu + + [BZ #22353] + * sysdeps/i386/i586/strcpy.S (STRCPY): Use conditional branches. + (1): Renamed to ... + (L(Src0)): This. + (L(Src1)): New. + (L(Src2)): Likewise. + (L(1)): Renamed to ... + (L(Src3)): This. + 2017-10-30 Joseph Myers * math/math.h [__HAVE_FLOAT16 && __USE_GNU] (M_Ef16): New macro. diff --git a/sysdeps/i386/i586/strcpy.S b/sysdeps/i386/i586/strcpy.S index a444604f4f..bb73ca4ef3 100644 --- a/sysdeps/i386/i586/strcpy.S +++ b/sysdeps/i386/i586/strcpy.S @@ -53,41 +53,35 @@ ENTRY (STRCPY) cfi_rel_offset (ebx, 0) andl $3, %ecx -#ifdef PIC - call 2f - cfi_adjust_cfa_offset (4) -2: popl %edx - cfi_adjust_cfa_offset (-4) - /* 0xb is the distance between 2: and 1: but we avoid writing - 1f-2b because the assembler generates worse code. */ - leal 0xb(%edx,%ecx,8), %ecx -#else - leal 1f(,%ecx,8), %ecx -#endif - - jmp *%ecx + cmpb $2, %cl + je L(Src2) + ja L(Src3) + cmpb $1, %cl + je L(Src1) - .align 8 -1: +L(Src0): orb (%esi), %al jz L(end) stosb xorl %eax, %eax incl %esi +L(Src1): orb (%esi), %al jz L(end) stosb xorl %eax, %eax incl %esi +L(Src2): orb (%esi), %al jz L(end) stosb xorl %eax, %eax incl %esi -L(1): movl (%esi), %ecx +L(Src3): + movl (%esi), %ecx leal 4(%esi),%esi subl %ecx, %eax @@ -107,7 +101,7 @@ L(1): movl (%esi), %ecx movl %edx, (%edi) leal 4(%edi),%edi - jmp L(1) + jmp L(Src3) L(3): movl %ecx, %edx