This is the mail archive of the
newlib@sourceware.org
mailing list for the newlib project.
[patch] libc/machine/m68k: Incorporate memcpy and memset.
- From: Kazu Hirata <kazu at codesourcery dot com>
- To: newlib at sources dot redhat dot com
- Date: Thu, 26 Apr 2007 21:48:09 -0700
- Subject: [patch] libc/machine/m68k: Incorporate memcpy and memset.
Hi,
Attached is a patch to incorporate memcpy and memset optimized for
m68k.
Tested on fido-none-elf and m68k-elf configured with --with-arch=cf.
OK to apply?
Kazu Hirata
2007-04-27 Kazu Hirata <kazu@codesourcery.com>
* libc/machine/m68k/Makefile.am (lib_a_SOURCES): Add memcpy.S
and memset.S.
* libc/machine/m68k/Makefile.in: Regenerate.
* libc/machine/m68k/memcpy.S, libc/machine/m68k/memset.S: New.
Index: newlib/libc/machine/m68k/Makefile.am
===================================================================
RCS file: /cvs/src/src/newlib/libc/machine/m68k/Makefile.am,v
retrieving revision 1.3
diff -u -d -p -r1.3 Makefile.am
--- newlib/libc/machine/m68k/Makefile.am 1 May 2006 22:01:01 -0000 1.3
+++ newlib/libc/machine/m68k/Makefile.am 23 Apr 2007 16:51:16 -0000
@@ -8,7 +8,7 @@ AM_CCASFLAGS = $(INCLUDES)
noinst_LIBRARIES = lib.a
-lib_a_SOURCES = setjmp.S strcpy.c strlen.c
+lib_a_SOURCES = setjmp.S strcpy.c strlen.c memcpy.S memset.S
lib_a_CCASFLAGS=$(AM_CCASFLAGS)
lib_a_CFLAGS=$(AM_CFLAGS)
Index: newlib/libc/machine/m68k/Makefile.in
===================================================================
RCS file: /cvs/src/src/newlib/libc/machine/m68k/Makefile.in,v
retrieving revision 1.11
diff -u -d -p -r1.11 Makefile.in
--- newlib/libc/machine/m68k/Makefile.in 18 Dec 2006 20:32:45 -0000 1.11
+++ newlib/libc/machine/m68k/Makefile.in 23 Apr 2007 16:51:16 -0000
@@ -56,7 +56,8 @@ ARFLAGS = cru
lib_a_AR = $(AR) $(ARFLAGS)
lib_a_LIBADD =
am_lib_a_OBJECTS = lib_a-setjmp.$(OBJEXT) lib_a-strcpy.$(OBJEXT) \
- lib_a-strlen.$(OBJEXT)
+ lib_a-strlen.$(OBJEXT) lib_a-memcpy.$(OBJEXT) \
+ lib_a-memset.$(OBJEXT)
lib_a_OBJECTS = $(am_lib_a_OBJECTS)
DEFAULT_INCLUDES = -I. -I$(srcdir)
depcomp =
@@ -181,7 +182,7 @@ AUTOMAKE_OPTIONS = cygnus
INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS)
AM_CCASFLAGS = $(INCLUDES)
noinst_LIBRARIES = lib.a
-lib_a_SOURCES = setjmp.S strcpy.c strlen.c
+lib_a_SOURCES = setjmp.S strcpy.c strlen.c memcpy.S memset.S
lib_a_CCASFLAGS = $(AM_CCASFLAGS)
lib_a_CFLAGS = $(AM_CFLAGS)
ACLOCAL_AMFLAGS = -I ../../..
@@ -249,6 +250,18 @@ lib_a-setjmp.o: setjmp.S
lib_a-setjmp.obj: setjmp.S
$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-setjmp.obj `if test -f 'setjmp.S'; then $(CYGPATH_W) 'setjmp.S'; else $(CYGPATH_W) '$(srcdir)/setjmp.S'; fi`
+lib_a-memcpy.o: memcpy.S
+ $(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memcpy.o `test -f 'memcpy.S' || echo '$(srcdir)/'`memcpy.S
+
+lib_a-memcpy.obj: memcpy.S
+ $(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memcpy.obj `if test -f 'memcpy.S'; then $(CYGPATH_W) 'memcpy.S'; else $(CYGPATH_W) '$(srcdir)/memcpy.S'; fi`
+
+lib_a-memset.o: memset.S
+ $(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memset.o `test -f 'memset.S' || echo '$(srcdir)/'`memset.S
+
+lib_a-memset.obj: memset.S
+ $(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memset.obj `if test -f 'memset.S'; then $(CYGPATH_W) 'memset.S'; else $(CYGPATH_W) '$(srcdir)/memset.S'; fi`
+
.c.o:
$(COMPILE) -c $<
Index: newlib/libc/machine/m68k/memcpy.S
===================================================================
RCS file: newlib/libc/machine/m68k/memcpy.S
diff -N newlib/libc/machine/m68k/memcpy.S
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ newlib/libc/machine/m68k/memcpy.S 23 Apr 2007 16:51:16 -0000
@@ -0,0 +1,93 @@
+/* a-memcpy.s -- memcpy, optimised for m68k asm
+ *
+ * Copyright (c) 2007 mocom software GmbH & Co KG)
+ *
+ * The authors hereby grant permission to use, copy, modify, distribute,
+ * and license this software and its documentation for any purpose, provided
+ * that existing copyright notices are retained in all copies and that this
+ * notice is included verbatim in any distributions. No written agreement,
+ * license, or royalty fee is required for any of the authorized uses.
+ * Modifications to this software may be copyrighted by their authors
+ * and need not follow the licensing terms described here, provided that
+ * the new terms are clearly indicated on the first page of each file where
+ * they apply.
+ */
+
+ .text
+ .align 4
+
+ .globl memcpy
+ .type memcpy, @function
+
+/* memcpy, optimised
+ *
+ * strategy:
+ * - no argument testing (the original memcpy from the GNU lib does
+ * no checking either)
+ * - make sure the destination pointer (the write pointer) is long word
+ * aligned. This is the best you can do, because writing to unaligned
+ * addresses can be the most costfull thing you could do.
+ * - Once you have figured that out, we do a little loop unrolling
+ * to further improve speed.
+ */
+
+memcpy:
+ move.l 4(%sp),%a0 | dest ptr
+ move.l 8(%sp),%a1 | src ptr
+ move.l 12(%sp),%d1 | len
+ cmp.l #8,%d1 | if fewer than 8 bytes to transfer,
+ blo .Lresidue | do not optimise
+
+ /* align dest */
+ move.l %a0,%d0 | copy of dest
+ neg.l %d0
+ and.l #3,%d0 | look for the lower two only
+ beq 2f | is aligned?
+ sub.l %d0,%d1
+ lsr.l #1,%d0 | word align needed?
+ bcc 1f
+ move.b (%a1)+,(%a0)+
+1:
+ lsr.l #1,%d0 | long align needed?
+ bcc 2f
+ move.w (%a1)+,(%a0)+
+2:
+
+ /* long word transfers */
+ move.l %d1,%d0
+ and.l #3,%d1 | byte residue
+ lsr.l #3,%d0
+ bcc 1f | carry set for 4-byte residue
+ move.l (%a1)+,(%a0)+
+1:
+ lsr.l #1,%d0 | number of 16-byte transfers
+ bcc .Lcopy | carry set for 8-byte residue
+ bra .Lcopy8
+
+1:
+ move.l (%a1)+,(%a0)+
+ move.l (%a1)+,(%a0)+
+.Lcopy8:
+ move.l (%a1)+,(%a0)+
+ move.l (%a1)+,(%a0)+
+.Lcopy:
+#if !defined (__mcoldfire__)
+ dbra %d0,1b
+#else
+ subq.l #1,%d0
+ bpl 1b
+#endif
+ bra .Lresidue
+
+1:
+ move.b (%a1)+,(%a0)+ | move residue bytes
+
+.Lresidue:
+#if !defined (__mcoldfire__)
+ dbra %d1,1b | loop until done
+#else
+ subq.l #1,%d1
+ bpl 1b
+#endif
+ move.l 4(%sp),%d0 | return value
+ rts
Index: newlib/libc/machine/m68k/memset.S
===================================================================
RCS file: newlib/libc/machine/m68k/memset.S
diff -N newlib/libc/machine/m68k/memset.S
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ newlib/libc/machine/m68k/memset.S 23 Apr 2007 16:51:16 -0000
@@ -0,0 +1,97 @@
+/* a-memset.s -- memset, optimised for fido asm
+ *
+ * Copyright (c) 2007 mocom software GmbH & Co KG)
+ *
+ * The authors hereby grant permission to use, copy, modify, distribute,
+ * and license this software and its documentation for any purpose, provided
+ * that existing copyright notices are retained in all copies and that this
+ * notice is included verbatim in any distributions. No written agreement,
+ * license, or royalty fee is required for any of the authorized uses.
+ * Modifications to this software may be copyrighted by their authors
+ * and need not follow the licensing terms described here, provided that
+ * the new terms are clearly indicated on the first page of each file where
+ * they apply.
+ */
+
+ .text
+ .align 4
+
+ .globl memset
+ .type memset, @function
+
+| memset, optimised
+|
+| strategy:
+| - no argument testing (the original memcpy from the GNU lib does
+| no checking either)
+| - make sure the destination pointer (the write pointer) is long word
+| aligned. This is the best you can do, because writing to unaligned
+| addresses can be the most costfull thing one could do.
+| - we fill long word wise if possible
+|
+| VG, 2006
+|
+| bugfixes:
+| - distribution of byte value improved - in cases someone gives
+| non-byte value
+| - residue byte transfer was not working
+|
+| VG, April 2007
+|
+memset:
+ move.l 4(%sp),%a0 | dest ptr
+ move.l 8(%sp),%d0 | value
+ move.l 12(%sp),%d1 | len
+ cmp.l #16,%d1
+ blo .Lbset | below, byte fills
+ |
+ move.l %d2,-(%sp) | need a register
+ move.b %d0,%d2 | distribute low byte to all byte in word
+ lsl.l #8,%d0
+ move.b %d2,%d0
+ move.w %d0,%d2
+ swap %d0 | rotate 16
+ move.w %d2,%d0
+ |
+ move.l %a0,%d2 | copy of src
+ neg.l %d2 | 1 2 3 ==> 3 2 1
+ and.l #3,%d2
+ beq 2f | is aligned
+ |
+ sub.l %d2,%d1 | fix length
+ lsr.l #1,%d2 | word align needed?
+ bcc 1f
+ move.b %d0,(%a0)+ | fill byte
+1:
+ lsr.l #1,%d2 | long align needed?
+ bcc 2f
+ move.w %d0,(%a0)+ | fill word
+2:
+ move.l %d1,%d2 | number of long transfers (at least 3)
+ lsr.l #2,%d2
+ subq.l #1,%d2
+
+1:
+ move.l %d0,(%a0)+ | fill long words
+.Llset:
+#if !defined (__mcoldfire__)
+ dbra %d2,1b | loop until done
+#else
+ subq.l #1,%d2
+ bpl 1b
+#endif
+ and.l #3,%d1 | residue byte transfers, fixed
+ move.l (%sp)+,%d2 | restore d2
+ bra .Lbset
+
+1:
+ move.b %d0,(%a0)+ | fill residue bytes
+.Lbset:
+#if !defined (__mcoldfire__)
+ dbra %d1,1b | loop until done
+#else
+ subq.l #1,%d1
+ bpl 1b
+#endif
+ move.l 4(%sp),%d0 | return value
+ rts