X86-64: Prepare memmove-vec-unaligned-erms.S

author H.J. Lu <hjl.tools@gmail.com>

Wed, 6 Apr 2016 17:19:16 +0000 (10:19 -0700)

committer H.J. Lu <hjl.tools@gmail.com>

Wed, 6 Apr 2016 17:19:16 +0000 (10:19 -0700)
author H.J. Lu <hjl.tools@gmail.com>
Wed, 6 Apr 2016 17:19:16 +0000 (10:19 -0700)
committer H.J. Lu <hjl.tools@gmail.com>
Wed, 6 Apr 2016 17:19:16 +0000 (10:19 -0700)
diff --git a/ChangeLog b/ChangeLog

index c801aff3f393c36936ba3dea3733982df7906baa..a10b8c43e2d3c77a4d028253c1b57735869ab98a 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2016-04-06   H.J. Lu  <hongjiu.lu@intel.com>
+
+       * sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+       (MEMCPY_SYMBOL): New.
+       (MEMPCPY_SYMBOL): Likewise.
+       (MEMMOVE_CHK_SYMBOL): Likewise.
+       Replace MEMMOVE_SYMBOL with MEMMOVE_CHK_SYMBOL on __mempcpy_chk
+       symbols.  Replace MEMMOVE_SYMBOL with MEMPCPY_SYMBOL on
+       __mempcpy symbols.  Provide alias for __memcpy_chk in libc.a.
+       Provide alias for memcpy in libc.a and ld.so.
+
  2016-04-06   H.J. Lu  <hongjiu.lu@intel.com>
  
         * sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S

index 66779a3bec17f9144493a4c8e5baa3403c4edc95..8a60d0ff029a5d0864977b2270bbdb215abe9618 100644 (file)
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -32,18 +32,27 @@
        8 * VEC_SIZE at a time.
     8. Otherwise, forward copy 8 * VEC_SIZE at a time.  */
  
-#if IS_IN (libc)
+#include <sysdep.h>
  
-# include <sysdep.h>
-# include "asm-syntax.h"
+#ifndef MEMCPY_SYMBOL
+# define MEMCPY_SYMBOL(p,s)            MEMMOVE_SYMBOL(p, s)
+#endif
  
-# ifndef VZEROUPPER
-#  if VEC_SIZE > 16
-#   define VZEROUPPER vzeroupper
-#  else
-#   define VZEROUPPER
-#  endif
+#ifndef MEMPCPY_SYMBOL
+# define MEMPCPY_SYMBOL(p,s)           MEMMOVE_SYMBOL(p, s)
+#endif
+
+#ifndef MEMMOVE_CHK_SYMBOL
+# define MEMMOVE_CHK_SYMBOL(p,s)       MEMMOVE_SYMBOL(p, s)
+#endif
+
+#ifndef VZEROUPPER
+# if VEC_SIZE > 16
+#  define VZEROUPPER vzeroupper
+# else
+#  define VZEROUPPER
  # endif
+#endif
  
  /* Threshold to use Enhanced REP MOVSB.  Since there is overhead to set
     up REP MOVSB operation, REP MOVSB isn't faster on short data.  The
@@ -52,32 +61,36 @@
     on processors with Enhanced REP MOVSB.  Since larger register size
     can move more data with a single load and store, the threshold is
     higher with larger register size.  */
-# ifndef REP_MOVSB_THRESHOLD
-#  define REP_MOVSB_THRESHOLD  (2048 * (VEC_SIZE / 16))
-# endif
+#ifndef REP_MOVSB_THRESHOLD
+# define REP_MOVSB_THRESHOLD   (2048 * (VEC_SIZE / 16))
+#endif
  
-# ifndef SECTION
-#  error SECTION is not defined!
-# endif
-       .section SECTION(.text),"ax",@progbits
+#ifndef SECTION
+# error SECTION is not defined!
+#endif
  
-# ifdef SHARED
-ENTRY (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_2))
+       .section SECTION(.text),"ax",@progbits
+#if defined SHARED && IS_IN (libc)
+ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_2))
         cmpq    %rdx, %rcx
         jb      HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_2))
+END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_2))
+#endif
  
-ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_2))
+#if VEC_SIZE == 16 || defined SHARED
+ENTRY (MEMPCPY_SYMBOL (__mempcpy, unaligned_2))
         movq    %rdi, %rax
         addq    %rdx, %rax
         jmp     L(start)
-END (MEMMOVE_SYMBOL (__mempcpy, unaligned_2))
+END (MEMPCPY_SYMBOL (__mempcpy, unaligned_2))
+#endif
  
-ENTRY (MEMMOVE_SYMBOL (__memmove_chk, unaligned_2))
+#if defined SHARED && IS_IN (libc)
+ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_2))
         cmpq    %rdx, %rcx
         jb      HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMMOVE_SYMBOL (__memmove_chk, unaligned_2))
-# endif
+END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_2))
+#endif
  
  ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_2))
         movq    %rdi, %rax
@@ -86,24 +99,29 @@ L(start):
         jb      L(less_vec)
         cmpq    $(VEC_SIZE * 2), %rdx
         ja      L(more_2x_vec)
+#if !defined USE_MULTIARCH || !IS_IN (libc)
+L(last_2x_vec):
+#endif
         /* From VEC and to 2 * VEC.  No branch when size == VEC_SIZE.  */
         VMOVU   (%rsi), %VEC(0)
         VMOVU   -VEC_SIZE(%rsi,%rdx), %VEC(1)
         VMOVU   %VEC(0), (%rdi)
         VMOVU   %VEC(1), -VEC_SIZE(%rdi,%rdx)
         VZEROUPPER
+#if !defined USE_MULTIARCH || !IS_IN (libc)
+L(nop):
+#endif
         ret
+#if defined USE_MULTIARCH && IS_IN (libc)
  END (MEMMOVE_SYMBOL (__memmove, unaligned_2))
  
-# if VEC_SIZE == 16
+# if VEC_SIZE == 16 && defined SHARED
  /* Only used to measure performance of REP MOVSB.  */
-#  ifdef SHARED
  ENTRY (__mempcpy_erms)
         movq    %rdi, %rax
         addq    %rdx, %rax
         jmp     L(start_movsb)
  END (__mempcpy_erms)
-#  endif
  
  ENTRY (__memmove_erms)
         movq    %rdi, %rax
@@ -132,11 +150,10 @@ strong_alias (__memmove_erms, __memcpy_erms)
  # endif
  
  # ifdef SHARED
-ENTRY (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms))
+ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
         cmpq    %rdx, %rcx
         jb      HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms))
-# endif
+END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
  
  ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
         movq    %rdi, %rax
@@ -144,11 +161,10 @@ ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
         jmp     L(start_erms)
  END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
  
-# ifdef SHARED
-ENTRY (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms))
+ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
         cmpq    %rdx, %rcx
         jb      HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms))
+END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
  # endif
  
  ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
@@ -192,6 +208,7 @@ L(movsb_more_2x_vec):
         /* Force 32-bit displacement to avoid long nop between
            instructions.  */
         ja.d32  L(movsb)
+#endif
         .p2align 4
  L(more_2x_vec):
         /* More than 2 * VEC.  */
@@ -227,13 +244,19 @@ L(copy_forward):
         VMOVU   %VEC(2), -(VEC_SIZE * 3)(%rdi,%rdx)
         VMOVU   %VEC(3), -(VEC_SIZE * 4)(%rdi,%rdx)
         cmpq    $(VEC_SIZE * 8), %rdx
-# if  VEC_SIZE == 16
+#if  VEC_SIZE == 16
+# if defined USE_MULTIARCH && IS_IN (libc)
         jbe     L(return)
  # else
+       /* Use 32-bit displacement to avoid long nop between
+          instructions.  */
+       jbe.d32 L(return)
+# endif
+#else
         /* Use 8-bit displacement to avoid long nop between
            instructions.  */
         jbe     L(return_disp8)
-# endif
+#endif
         leaq    (VEC_SIZE * 4)(%rdi), %rcx
         addq    %rdi, %rdx
         andq    $-(VEC_SIZE * 4), %rdx
@@ -263,22 +286,25 @@ L(loop):
         addq    $(VEC_SIZE * 4), %rcx
         cmpq    %rcx, %rdx
         jne     L(loop)
+#if !defined USE_MULTIARCH || !IS_IN (libc)
+L(return):
+#endif
  L(return_disp8):
         VZEROUPPER
         ret
  L(less_vec):
         /* Less than 1 VEC.  */
-# if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
-#  error Unsupported VEC_SIZE!
-# endif
-# if VEC_SIZE > 32
+#if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
+# error Unsupported VEC_SIZE!
+#endif
+#if VEC_SIZE > 32
         cmpb    $32, %dl
         jae     L(between_32_63)
-# endif
-# if VEC_SIZE > 16
+#endif
+#if VEC_SIZE > 16
         cmpb    $16, %dl
         jae     L(between_16_31)
-# endif
+#endif
         cmpb    $8, %dl
         jae     L(between_8_15)
         cmpb    $4, %dl
@@ -290,7 +316,7 @@ L(less_vec):
         movb    %cl, (%rdi)
  1:
         ret
-# if VEC_SIZE > 32
+#if VEC_SIZE > 32
  L(between_32_63):
         /* From 32 to 63.  No branch when size == 32.  */
         vmovdqu (%rsi), %ymm0
@@ -299,8 +325,8 @@ L(between_32_63):
         vmovdqu %ymm1, -32(%rdi,%rdx)
         VZEROUPPER
         ret
-# endif
-# if VEC_SIZE > 16
+#endif
+#if VEC_SIZE > 16
         /* From 16 to 31.  No branch when size == 16.  */
  L(between_16_31):
         vmovdqu (%rsi), %xmm0
@@ -308,7 +334,7 @@ L(between_16_31):
         vmovdqu %xmm0, (%rdi)
         vmovdqu %xmm1, -16(%rdi,%rdx)
         ret
-# endif
+#endif
  L(between_8_15):
         /* From 8 to 15.  No branch when size == 8.  */
         movq    -8(%rsi,%rdx), %rcx
@@ -331,10 +357,10 @@ L(between_2_3):
         movw    %si, (%rdi)
         ret
  
-# if VEC_SIZE > 16
+#if VEC_SIZE > 16
         /* Align to 16 bytes to avoid long nop between instructions.  */
         .p2align 4
-# endif
+#endif
  L(more_2x_vec_overlap):
         /* More than 2 * VEC and there is overlap bewteen destination
            and source.  */
@@ -454,15 +480,19 @@ L(loop_8x_vec_backward):
         jmp     L(between_4x_vec_and_8x_vec)
  END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
  
-# ifdef SHARED
+#ifdef SHARED
+# if IS_IN (libc)
+#  ifdef USE_MULTIARCH
  strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_erms),
               MEMMOVE_SYMBOL (__memcpy, unaligned_erms))
  strong_alias (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms),
               MEMMOVE_SYMBOL (__memcpy_chk, unaligned_erms))
-strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_2),
-             MEMMOVE_SYMBOL (__memcpy, unaligned_2))
-strong_alias (MEMMOVE_SYMBOL (__memmove_chk, unaligned_2),
-             MEMMOVE_SYMBOL (__memcpy_chk, unaligned_2))
+#  endif
+strong_alias (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_2),
+             MEMMOVE_CHK_SYMBOL (__memcpy_chk, unaligned_2))
  # endif
-
+#endif
+#if VEC_SIZE == 16 || defined SHARED
+strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_2),
+             MEMCPY_SYMBOL (__memcpy, unaligned_2))
  #endif
author	H.J. Lu <hjl.tools@gmail.com>
	Wed, 6 Apr 2016 17:19:16 +0000 (10:19 -0700)
committer	H.J. Lu <hjl.tools@gmail.com>
	Wed, 6 Apr 2016 17:19:16 +0000 (10:19 -0700)
ChangeLog		patch \| blob \| blame \| history
sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S		patch \| blob \| blame \| history