sysdeps/arm/memmove.S

   1 /* Copyright (C) 2006, 2008 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, write to the Free
  18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307 USA.  */
  20
  21 #include <sysdep.h>
  22
  23 /*
  24  * Data preload for architectures that support it (ARM V5TE and above)
  25  */
  26 #if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
  27      && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
  28      && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
  29      && !defined (__ARM_ARCH_5T__))
  30 #define PLD(code...)    code
  31 #else
  32 #define PLD(code...)
  33 #endif
  34
  35 /*
  36  * This can be used to enable code to cacheline align the source pointer.
  37  * Experiments on tested architectures (StrongARM and XScale) didn't show
  38  * this a worthwhile thing to do.  That might be different in the future.
  39  */
  40 //#define CALGN(code...)        code
  41 #define CALGN(code...)
  42
  43 /*
  44  * Endian independent macros for shifting bytes within registers.
  45  */
  46 #ifndef __ARMEB__
  47 #define pull            lsr
  48 #define push            lsl
  49 #else
  50 #define pull            lsl
  51 #define push            lsr
  52 #endif
  53
  54                 .text
  55
  56 /*
  57  * Prototype: void *memmove(void *dest, const void *src, size_t n);
  58  *
  59  * Note:
  60  *
  61  * If the memory regions don't overlap, we simply branch to memcpy which is
  62  * normally a bit faster. Otherwise the copy is done going downwards.
  63  */
  64
  65 ENTRY(memmove)
  66
  67                 subs    ip, r0, r1
  68                 cmphi   r2, ip
  69 #ifdef NOT_IN_libc
  70                 bls     memcpy
  71 #else
  72                 bls     HIDDEN_JUMPTARGET(memcpy)
  73 #endif
  74
  75                 stmfd   sp!, {r0, r4, lr}
  76                 add     r1, r1, r2
  77                 add     r0, r0, r2
  78                 subs    r2, r2, #4
  79                 blt     8f
  80                 ands    ip, r0, #3
  81         PLD(    pld     [r1, #-4]               )
  82                 bne     9f
  83                 ands    ip, r1, #3
  84                 bne     10f
  85
  86 1:              subs    r2, r2, #(28)
  87                 stmfd   sp!, {r5 - r8}
  88                 blt     5f
  89
  90         CALGN(  ands    ip, r1, #31             )
  91         CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
  92         CALGN(  bcs     2f                      )
  93         CALGN(  adr     r4, 6f                  )
  94         CALGN(  subs    r2, r2, ip              )  @ C is set here
  95         CALGN(  add     pc, r4, ip              )
  96
  97         PLD(    pld     [r1, #-4]               )
  98 2:      PLD(    subs    r2, r2, #96             )
  99         PLD(    pld     [r1, #-32]              )
 100         PLD(    blt     4f                      )
 101         PLD(    pld     [r1, #-64]              )
 102         PLD(    pld     [r1, #-96]              )
 103
 104 3:      PLD(    pld     [r1, #-128]             )
 105 4:              ldmdb   r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
 106                 subs    r2, r2, #32
 107                 stmdb   r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
 108                 bge     3b
 109         PLD(    cmn     r2, #96                 )
 110         PLD(    bge     4b                      )
 111
 112 5:              ands    ip, r2, #28
 113                 rsb     ip, ip, #32
 114                 addne   pc, pc, ip              @ C is always clear here
 115                 b       7f
 116 6:              nop
 117                 ldr     r3, [r1, #-4]!
 118                 ldr     r4, [r1, #-4]!
 119                 ldr     r5, [r1, #-4]!
 120                 ldr     r6, [r1, #-4]!
 121                 ldr     r7, [r1, #-4]!
 122                 ldr     r8, [r1, #-4]!
 123                 ldr     lr, [r1, #-4]!
 124
 125                 add     pc, pc, ip
 126                 nop
 127                 nop
 128                 str     r3, [r0, #-4]!
 129                 str     r4, [r0, #-4]!
 130                 str     r5, [r0, #-4]!
 131                 str     r6, [r0, #-4]!
 132                 str     r7, [r0, #-4]!
 133                 str     r8, [r0, #-4]!
 134                 str     lr, [r0, #-4]!
 135
 136         CALGN(  bcs     2b                      )
 137
 138 7:              ldmfd   sp!, {r5 - r8}
 139
 140 8:              movs    r2, r2, lsl #31
 141                 ldrneb  r3, [r1, #-1]!
 142                 ldrcsb  r4, [r1, #-1]!
 143                 ldrcsb  ip, [r1, #-1]
 144                 strneb  r3, [r0, #-1]!
 145                 strcsb  r4, [r0, #-1]!
 146                 strcsb  ip, [r0, #-1]
 147                 ldmfd   sp!, {r0, r4, pc}
 148
 149 9:              cmp     ip, #2
 150                 ldrgtb  r3, [r1, #-1]!
 151                 ldrgeb  r4, [r1, #-1]!
 152                 ldrb    lr, [r1, #-1]!
 153                 strgtb  r3, [r0, #-1]!
 154                 strgeb  r4, [r0, #-1]!
 155                 subs    r2, r2, ip
 156                 strb    lr, [r0, #-1]!
 157                 blt     8b
 158                 ands    ip, r1, #3
 159                 beq     1b
 160
 161 10:             bic     r1, r1, #3
 162                 cmp     ip, #2
 163                 ldr     r3, [r1, #0]
 164                 beq     17f
 165                 blt     18f
 166
 167
 168                 .macro  backward_copy_shift push pull
 169
 170                 subs    r2, r2, #28
 171                 blt     14f
 172
 173         CALGN(  ands    ip, r1, #31             )
 174         CALGN(  rsb     ip, ip, #32             )
 175         CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
 176         CALGN(  subcc   r2, r2, ip              )
 177         CALGN(  bcc     15f                     )
 178
 179 11:             stmfd   sp!, {r5 - r9}
 180
 181         PLD(    pld     [r1, #-4]               )
 182         PLD(    subs    r2, r2, #96             )
 183         PLD(    pld     [r1, #-32]              )
 184         PLD(    blt     13f                     )
 185         PLD(    pld     [r1, #-64]              )
 186         PLD(    pld     [r1, #-96]              )
 187
 188 12:     PLD(    pld     [r1, #-128]             )
 189 13:             ldmdb   r1!, {r7, r8, r9, ip}
 190                 mov     lr, r3, push #\push
 191                 subs    r2, r2, #32
 192                 ldmdb   r1!, {r3, r4, r5, r6}
 193                 orr     lr, lr, ip, pull #\pull
 194                 mov     ip, ip, push #\push
 195                 orr     ip, ip, r9, pull #\pull
 196                 mov     r9, r9, push #\push
 197                 orr     r9, r9, r8, pull #\pull
 198                 mov     r8, r8, push #\push
 199                 orr     r8, r8, r7, pull #\pull
 200                 mov     r7, r7, push #\push
 201                 orr     r7, r7, r6, pull #\pull
 202                 mov     r6, r6, push #\push
 203                 orr     r6, r6, r5, pull #\pull
 204                 mov     r5, r5, push #\push
 205                 orr     r5, r5, r4, pull #\pull
 206                 mov     r4, r4, push #\push
 207                 orr     r4, r4, r3, pull #\pull
 208                 stmdb   r0!, {r4 - r9, ip, lr}
 209                 bge     12b
 210         PLD(    cmn     r2, #96                 )
 211         PLD(    bge     13b                     )
 212
 213                 ldmfd   sp!, {r5 - r9}
 214
 215 14:             ands    ip, r2, #28
 216                 beq     16f
 217
 218 15:             mov     lr, r3, push #\push
 219                 ldr     r3, [r1, #-4]!
 220                 subs    ip, ip, #4
 221                 orr     lr, lr, r3, pull #\pull
 222                 str     lr, [r0, #-4]!
 223                 bgt     15b
 224         CALGN(  cmp     r2, #0                  )
 225         CALGN(  bge     11b                     )
 226
 227 16:             add     r1, r1, #(\pull / 8)
 228                 b       8b
 229
 230                 .endm
 231
 232
 233                 backward_copy_shift     push=8  pull=24
 234
 235 17:             backward_copy_shift     push=16 pull=16
 236
 237 18:             backward_copy_shift     push=24 pull=8
 238
 239
 240 END(memmove)
 241 libc_hidden_builtin_def (memmove)