This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
Hello All, Please find below, patch for optimized implementation of 'memcpy' for PowerPC e6500 (32-bit & 64-bit) target using Altivec instructions. 2015-08-31 Rohit Arul Raj <rohitarulraj@freescale.com> * sysdeps/powerpc/powerpc32/e6500/memcpy.S: New File: optimized memcpy implementation using altivec instructions. * sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Add check for e6500 memcpy function. * sysdeps/powerpc/powerpc32/power4/multiarch/Makefile: Add memcpy-e6500 object. * sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c: Add check for e6500 memcpy function. * sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-e6500.S: New File: multiarch e6500 memcpy. * sysdeps/powerpc/powerpc64/e6500/memcpy.S: New File: optimized memcpy implementation using altivec instructions. * sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Add check for e6500 memcpy function. * sysdeps/powerpc/powerpc64/multiarch/Makefile: Add memcpy-e6500 object. * sysdeps/powerpc/powerpc64/multiarch/memcpy.c: Add check for e6500 memcpy function. * sysdeps/powerpc/powerpc64/multiarch/memcpy-e6500.S: New File: multiarch e6500 memcpy. diff -Naur glibc-2.20/sysdeps/powerpc/powerpc32/e6500/memcpy.S glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/e6500/memcpy.S --- glibc-2.20/sysdeps/powerpc/powerpc32/e6500/memcpy.S 1969-12-31 18:00:00.000000000 -0600 +++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/e6500/memcpy.S 2015-08-29 15:45:37.044421872 -0500 @@ -0,0 +1,212 @@ +/* Optimized memcpy implementation for e6500 32-bit PowerPC. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + + r3 = destination + r4 = source + r5 = byte count + + volatile fixed point registers usable: + r0, r3-r12 + + volatile floating point registers usable: + f0-f13. */ + +EALIGN (memcpy, 5, 0) + cmplw cr0, r4, r3 /* if source==destination, return. */ + beqlr cr0 + /* if number of bytes is less than 16, (optimal value TBD), + but greater than zero copy byte-by-byte. */ + cmplwi r5, 16 + mr r6, r3 + blt L(copy_remaining) + neg r0, r3 + andi. r11, r0, 15 + beq L(dst_align16) + lwz r0, 0(r4) + lwz r7, 4(r4) + lwz r8, 8(r4) + lwz r9, 12(r4) + subf r5, r11, r5 + add r4, r4, r11 + stw r0, 0(r6) + stw r7, 4(r6) + stw r8, 8(r6) + stw r9, 12(r6) + add r6, r6, r11 +L(dst_align16): + cmplwi 7, r5, 63 + ble 7, L(copy_remaining) + srwi r11, r5, 6 /* No of 64 byte copy count. */ + rlwinm r5, r5, 0, 26, 31 /* remaining bytes. */ + rlwinm. r0, r4, 0, 28, 31 + mtctr r11 + li r7, 16 + li r8, 32 + li r9, 48 + bne 0, L(src_naligned) +L(copy_salign16): + lvx v14, 0, r4 /* copy 64 bytes. */ + lvx v15, r7, r4 + lvx v16, r8, r4 + lvx v17, r9, r4 + addi r4, r4, 64 + stvx v14, 0, r6 + stvx v15, r7, r6 + stvx v16, r8, r6 + stvx v17, r9, r6 + addi r6, r6, 64 + bdnz L(copy_salign16) +L(copy_remaining): + srwi. r11, r5, 3 /* No of 8 byte copy count. */ + rlwinm r5, r5, 0, 29, 31 /* remaining bytes. */ + beq 0, L(copy_bytes) + mtcrf 0x01, r11 + bf cr7*4+1, L(cp16b) + + lwz r0, 0(r4) /* copy 32 bytes */ + lwz r7, 4(r4) + lwz r8, 8(r4) + lwz r9, 12(r4) + + stw r0, 0(r6) + stw r7, 4(r6) + stw r8, 8(r6) + stw r9, 12(r6) + + lwz r0, 16(r4) + lwz r7, 20(r4) + lwz r8, 24(r4) + lwz r9, 28(r4) + addi r4, r4, 32 + + stw r0, 16(r6) + stw r7, 20(r6) + stw r8, 24(r6) + stw r9, 28(r6) + addi r6, r6, 32 +L(cp16b): + bf cr7*4+2, L(cp8b) + lwz r0, 0(r4) /* copy 16 bytes */ + lwz r7, 4(r4) + lwz r8, 8(r4) + lwz r9, 12(r4) + + addi r4, r4, 16 + + stw r0, 0(r6) + stw r7, 4(r6) + stw r8, 8(r6) + stw r9, 12(r6) + addi r6, r6, 16 +L(cp8b): + bf cr7*4+3, L(copy_bytes) + lwz r0, 0(r4) /* copy 8 bytes */ + lwz r7, 4(r4) + addi r4, r4, 8 + + stw r0, 0(r6) + stw r7, 4(r6) + addi r6, r6, 8 +L(copy_bytes): + cmplwi cr1, r5, 4 + cmplwi cr0, r5, 1 + bgt cr1, L(gt4b) /* nb > 4? (5, 6, 7 bytes). */ + ble cr0, L(lt1b) /* nb <= 1? (0, 1 bytes). */ + addi r0, r5, -2 /* 2, 3, 4 bytes. */ + lhz r9, 0(r4) + lhzx r11, r4, r0 + sth r9, 0(r6) + sthx r11, r6, r0 + blr +L(gt4b): + addi r0, r5, -4 /* 5, 6, 7 bytes. */ + lwz r9, 0(r4) + lwzx r11, r4, r0 + stw r9, 0(r6) + stwx r11, r6, r0 + blr +L(lt1b): + mtocrf 0x1, r5 /* nb == 0 ? return. */ + bflr 31 + lbz r0, 0(r4) /* nb == 1. */ + stb r0, 0(r6) + blr + +L(src_naligned): +#ifndef _SOFT_FLOAT + rlwinm. r0, r4, 0, 29, 31 + beq 0, L(copy_salign8) +#endif +L(copy_snalign): /* copy 64 bytes. */ + lvx v0, 0, r4 /* load MSQ. */ + lvsl v18, 0, r4 /* set permute control vector. */ + lvx v19, r7, r4 /* load LSQ. */ + vperm v14, v0, v19, v18 /* align the data. */ + lvx v0, r7, r4 /* load MSQ. */ + lvsl v18, r7, r4 /* set permute control vector. */ + lvx v19, r8, r4 /* load LSQ. */ + vperm v15, v0, v19, v18 /* align the data. */ + lvx v0, r8, r4 /* load MSQ. */ + lvsl v18, r8, r4 /* set permute control vector. */ + lvx v19, r9, r4 /* load LSQ. */ + vperm v16, v0, v19, v18 /* align the data. */ + lvx v0, r9, r4 /* load MSQ. */ + lvsl v18, r9, r4 /* set permute control vector. */ + addi r4, r4, 64 + lvx v19, 0, r4 /* load LSQ. */ + vperm v17, v0, v19, v18 /* align the data. */ + stvx v14, 0, r6 + stvx v15, r7, r6 + stvx v16, r8, r6 + stvx v17, r9, r6 + addi r6, r6, 64 + bdnz L(copy_snalign) + b L(copy_remaining) + +#ifndef _SOFT_FLOAT +L(copy_salign8): + lfd 0, 0(r4) /* copy 64 bytes. */ + lfd 1, 8(r4) + lfd 2, 16(r4) + lfd 3, 24(r4) + stfd 0, 0(r6) + stfd 1, 8(r6) + stfd 2, 16(r6) + stfd 3, 24(r6) + lfd 0, 32(r4) + lfd 1, 40(r4) + lfd 2, 48(r4) + lfd 3, 56(r4) + addi r4, r4, 64 + stfd 0, 32(r6) + stfd 1, 40(r6) + stfd 2, 48(r6) + stfd 3, 56(r6) + addi r6, r6, 64 + bdnz L(copy_salign8) + b L(copy_remaining) +#endif + +END (memcpy) +libc_hidden_builtin_def (memcpy) diff -Naur glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c --- glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c 2015-08-29 15:42:09.769408236 -0500 +++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c 2015-08-29 15:45:37.044421872 -0500 @@ -58,6 +58,10 @@ __memcpy_power6) IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_CELL_BE, __memcpy_cell) + IFUNC_IMPL_ADD (array, i, memcpy, + (((hwcap & PPC_FEATURE_E6500) == PPC_FEATURE_E6500) + && (hwcap2 & PPC_FEATURE2_HAS_ISEL)), + __memcpy_e6500) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ppc)) /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c. */ diff -Naur glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile --- glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile 2015-08-29 15:42:09.769408236 -0500 +++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile 2015-08-29 15:46:34.217426773 -0500 @@ -1,7 +1,7 @@ ifeq ($(subdir),string) sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ - memcpy-ppc32 memcmp-power7 memcmp-e6500 memcmp-ppc32 \ - memset-power7 memset-power6 memset-ppc32 \ + memcpy-e6500 memcpy-ppc32 memcmp-power7 memcmp-e6500 \ + memcmp-ppc32 memset-power7 memset-power6 memset-ppc32 \ bzero-power7 bzero-power6 bzero-ppc32 \ mempcpy-power7 mempcpy-ppc32 memchr-power7 \ memchr-ppc32 memrchr-power7 memrchr-ppc32 rawmemchr-power7 \ diff -Naur glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c --- glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c 2015-08-29 15:41:52.333407557 -0500 +++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c 2015-08-29 15:45:37.044421872 -0500 @@ -25,6 +25,7 @@ # include "init-arch.h" extern __typeof (memcpy) __memcpy_ppc attribute_hidden; +extern __typeof (memcpy) __memcpy_e6500 attribute_hidden; extern __typeof (memcpy) __memcpy_cell attribute_hidden; extern __typeof (memcpy) __memcpy_power6 attribute_hidden; extern __typeof (memcpy) __memcpy_a2 attribute_hidden; @@ -40,6 +41,9 @@ (hwcap & PPC_FEATURE_ARCH_2_05) ? __memcpy_power6 : (hwcap & PPC_FEATURE_CELL_BE) - ? __memcpy_cell + ? __memcpy_cell : + (((hwcap & PPC_FEATURE_E6500) == PPC_FEATURE_E6500) + && (hwcap2 & PPC_FEATURE2_HAS_ISEL)) + ? __memcpy_e6500 : __memcpy_ppc); #endif diff -Naur glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-e6500.S glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-e6500.S --- glibc-2.20/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-e6500.S 1969-12-31 18:00:00.000000000 -0600 +++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-e6500.S 2015-08-29 15:45:37.045421842 -0500 @@ -0,0 +1,38 @@ +/* Optimized memcpy implementation for PowerPC32/e6500. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memcpy_e6500); \ + .type C_SYMBOL_NAME(__memcpy_e6500),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memcpy_e6500) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memcpy_e6500) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/e6500/memcpy.S> diff -Naur glibc-2.20/sysdeps/powerpc/powerpc64/e6500/memcpy.S glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/e6500/memcpy.S --- glibc-2.20/sysdeps/powerpc/powerpc64/e6500/memcpy.S 1969-12-31 18:00:00.000000000 -0600 +++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/e6500/memcpy.S 2015-08-29 15:45:37.045421842 -0500 @@ -0,0 +1,184 @@ +/* Optimized memcpy implementation for e6500 64-bit PowerPC. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + + r3 = destination + r4 = source + r5 = byte count + + volatile fixed point registers usable: + r0, r3-r12 + + volatile floating point registers usable: + f0-f13. */ + +EALIGN (memcpy, 5, 0) + CALL_MCOUNT 3 + cmpld cr0, r4, r3 /* if source==destination, return. */ + beqlr cr0 + /* if number of bytes is less than 16 but greater than zero, + copy byte-by-byte. */ + cmpldi r5, 16 + mr r6, r3 + ble L(copy_remaining) + neg r0, r3 + andi. r11, r0, 15 + beq L(dst_align) + ld r12, 0(r4) + ld r0, 8(r4) + subf r5, r11, r5 + add r4, r4, r11 + std r12, 0(r6) + std r0, 8(r6) + add r6, r6, r11 +L(dst_align): + cmpldi 7, r5, 63 + ble 7, L(copy_remaining) + srwi r11, r5, 6 /* No of 64 byte copy count. */ + rlwinm r5, r5, 0, 26, 31 /* remaining bytes. */ + rlwinm. r0, r4, 0, 28, 31 + mtctr r11 + li r7, 16 + li r8, 32 + li r9, 48 + bne 0, L(src_naligned) +L(copy_salign): + lvx v14, 0, r4 + lvx v15, r7, r4 + lvx v16, r8, r4 + lvx v17, r9, r4 + addi r4, r4, 64 + stvx v14, 0, r6 + stvx v15, r7, r6 + stvx v16, r8, r6 + stvx v17, r9, r6 + addi r6, r6, 64 + bdnz L(copy_salign) +L(copy_remaining): + srwi. r11, r5, 3 /* No of 8 byte copy count. */ + rlwinm r5, r5, 0, 29, 31 /* remaining bytes. */ + beq 0, L(copy_bytes) + mtcrf 0x01, r11 + bf cr7*4+1, L(cp16b) + ld r0, 0(r4) /* copy 32 bytes. */ + ld r7, 8(r4) + ld r8, 16(r4) + ld r9, 24(r4) + addi r4, r4, 32 + std r0, 0(r6) + std r7, 8(r6) + std r8, 16(r6) + std r9, 24(r6) + addi r6, r6, 32 +L(cp16b): + bf cr7*4+2, L(cp8b) + ld r7, 0(r4) /* copy 16 bytes. */ + ld r8, 8(r4) + addi r4, r4, 16 + std r7, 0(r6) + std r8, 8(r6) + addi r6, r6, 16 +L(cp8b): + bf cr7*4+3, L(copy_bytes) + ld r7, 0(r4) /* copy 8 bytes. */ + addi r4, r4, 8 + std r7, 0(r6) + addi r6, r6, 8 +L(copy_bytes): + cmpldi cr1, r5, 4 + cmpldi cr0, r5, 1 + bgt cr1, L(gt4b) /* nb > 4? (5, 6, 7 bytes). */ + ble cr0, L(lt1b) /* nb <= 1? (0, 1 bytes). */ + addi r0, r5, -2 /* 2, 3, 4 bytes. */ + lhz r9, 0(r4) + lhzx r11, r4, r0 + sth r9, 0(r6) + sthx r11, r6, r0 + blr +L(gt4b): + addi r0, r5, -4 /* 5, 6, 7 bytes. */ + lwz r9, 0(r4) + lwzx r11, r4, r0 + stw r9, 0(r6) + stwx r11, r6, r0 + blr +L(lt1b): + mtocrf 0x1, r5 /* nb == 0 ? return. */ + bflr 31 + lbz r0, 0(r4) /* nb == 1. */ + stb r0, 0(r6) + blr + +L(src_naligned): + rlwinm. r0, r4, 0, 29, 31 + beq 0, L(copy_salign8) +L(copy_snalign): + lvx v0, 0, r4 /* load MSQ. */ + lvsl v18, 0, r4 /* set permute control vector. */ + lvx v19, r7, r4 /* load LSQ. */ + vperm v14, v0, v19, v18 /* align the data. */ + lvx v0, r7, r4 /* load MSQ. */ + lvsl v18, r7, r4 /* set permute control vector. */ + lvx v19, r8, r4 /* load LSQ. */ + vperm v15, v0, v19, v18 /* align the data. */ + lvx v0, r8, r4 /* load MSQ. */ + lvsl v18, r8, r4 /* set permute control vector. */ + lvx v19, r9, r4 /* load LSQ. */ + vperm v16, v0, v19, v18 /* align the data. */ + lvx v0, r9, r4 /* load MSQ. */ + lvsl v18, r9, r4 /* set permute control vector. */ + addi r4, r4, 64 + lvx v19, 0, r4 /* load LSQ. */ + vperm v17, v0, v19, v18 /* align the data. */ + stvx v14, 0, r6 + stvx v15, r7, r6 + stvx v16, r8, r6 + stvx v17, r9, r6 + addi r6, r6, 64 + bdnz L(copy_snalign) + b L(copy_remaining) + +L(copy_salign8): + ld r0, 0(r4) + ld r7, 8(r4) + ld r8, 16(r4) + ld r9, 24(r4) + std r0, 0(r6) + std r7, 8(r6) + std r8, 16(r6) + std r9, 24(r6) + ld r0, 32(r4) + ld r7, 40(r4) + ld r8, 48(r4) + ld r9, 56(r4) + addi r4, r4, 64 + std r0, 32(r6) + std r7, 40(r6) + std r8, 48(r6) + std r9, 56(r6) + addi r6, r6, 64 + bdnz L(copy_salign8) + b L(copy_remaining) + +END_GEN_TB (memcpy,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff -Naur glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c --- glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c 2015-08-29 15:42:09.771408290 -0500 +++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c 2015-08-29 15:45:37.045421842 -0500 @@ -60,6 +60,10 @@ __memcpy_cell) IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_POWER4, __memcpy_power4) + IFUNC_IMPL_ADD (array, i, memcpy, + (((hwcap & PPC_FEATURE_E6500) == PPC_FEATURE_E6500) + && (hwcap2 & PPC_FEATURE2_HAS_ISEL)), + __memcpy_e6500) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ppc)) /* Support sysdeps/powerpc/powerpc64/multiarch/memmove.c. */ diff -Naur glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/Makefile glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/Makefile --- glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/Makefile 2015-08-29 15:42:09.771408290 -0500 +++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/Makefile 2015-08-29 15:47:51.985430863 -0500 @@ -1,7 +1,7 @@ ifeq ($(subdir),string) sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ - memcpy-power4 memcpy-ppc64 memcmp-power7 memcmp-power4 \ - memcmp-e6500 memcmp-ppc64 \ + memcpy-power4 memcpy-e6500 memcpy-ppc64 memcmp-power7 \ + memcmp-power4 memcmp-e6500 memcmp-ppc64 \ memset-power7 memset-power6 memset-power4 \ memset-ppc64 bzero-power4 bzero-power6 bzero-power7 \ mempcpy-power7 mempcpy-ppc64 memchr-power7 memchr-ppc64 \ diff -Naur glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/memcpy.c glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/memcpy.c --- glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/memcpy.c 2015-08-29 15:41:52.354407558 -0500 +++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/memcpy.c 2015-08-29 15:45:37.045421842 -0500 @@ -30,6 +30,7 @@ extern __typeof (__redirect_memcpy) __libc_memcpy; extern __typeof (__redirect_memcpy) __memcpy_ppc attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_e6500 attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_power4 attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_cell attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_power6 attribute_hidden; @@ -46,7 +47,10 @@ (hwcap & PPC_FEATURE_CELL_BE) ? __memcpy_cell : (hwcap & PPC_FEATURE_POWER4) - ? __memcpy_power4 + ? __memcpy_power4 : + (((hwcap & PPC_FEATURE_E6500) == PPC_FEATURE_E6500) + && (hwcap2 & PPC_FEATURE2_HAS_ISEL)) + ? __memcpy_e6500 : __memcpy_ppc); #undef memcpy diff -Naur glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/memcpy-e6500.S glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/memcpy-e6500.S --- glibc-2.20/sysdeps/powerpc/powerpc64/multiarch/memcpy-e6500.S 1969-12-31 18:00:00.000000000 -0600 +++ glibc-2.20-e6500-mcpy/sysdeps/powerpc/powerpc64/multiarch/memcpy-e6500.S 2015-08-29 15:45:37.045421842 -0500 @@ -0,0 +1,40 @@ +/* Optimized memcpy implementation for PowerPC64/e6500. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(__memcpy_e6500) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + BODY_LABEL(__memcpy_e6500): \ + cfi_startproc; \ + LOCALENTRY(__memcpy_e6500) + +#undef END_GEN_TB +#define END_GEN_TB(name, mask) \ + cfi_endproc; \ + TRACEBACK_MASK(__memcpy_e6500,mask) \ + END_2(__memcpy_e6500) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/e6500/memcpy.S> The patch was generated on top of glibc v2.20 source base. The patch was tested with dejaGNU and glibc testsuite. There were no regressions. The benchsuite (both 32-bit and 64-bit) results are attached for your reference. Please let me know your comments. Regards, Rohit
Attachment:
benchtest-e6500-64bit-memcpy.txt
Description: benchtest-e6500-64bit-memcpy.txt
Attachment:
benchtest-e6500-32bit-memcpy.txt
Description: benchtest-e6500-32bit-memcpy.txt
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |