[patch] Fix and reenable memcpy/memset for m68k

Eric Norum norume@aps.anl.gov
Mon Apr 30 21:29:00 GMT 2007


On Apr 30, 2007, at 3:39 PM, Joel Sherrill wrote:

> Kazu,
>
> Just to finish this one completely off.  I want to let you know  
> that I pinged Eric Norum
> to review this because he was responsible for the m68k specific  
> memcpy RTEMS uses.
> Where possible we prefer to use newlib code instead of overriding  
> it with our own
> version.   We are inclined to switch to your implementation but I  
> would
> appreciate you reviewing to the attached memcpy.c to see if Eric  
> had any
> optimization ideas that aren't also in yours.
>
> FYI parts of the RTEMS build structure can distinguish CPU models  
> at a finer
> granularity than  multilib allows.  The attached memcpy.c has some  
> optimizations
> to account for the CPU32+.
> Thanks.

The optimization in question is to put the CPU32+ (and 68010 if there  
are still any of those in the wild) into 'loop mode'.  This is a  
special case of a single two-byte instruction followed by a dbra.    
This allows the CPU to repeatedly execute the two instructions  
without any instruction fetch cycles.  Thus, the assembly code from  
Kazu Hirata actually incurs a performance penalty by unrolling the  
loops.
Whether or not this optimization is worthwhile adding is left as an  
argument for others to decide.   Are CPU32+ architecture machines  
even available any more?

>
> --joel
>
> Kazu Hirata wrote:
>> Hi,
>>
>> Attached is a patch to fix and reenable memcpy/memset as suggested by
>> Eric.  (Thanks Eric for pointing our the bug!)
>>
>> Tested on fido-elf.  OK to apply?
>>
>> Kazu Hirata
>>
>> 2007-04-30  Kazu Hirata  <kazu@codesourcery.com>
>>
>> 	* libc/machine/m68k/Makefile.am (lib_a_SOURCES): Add memcpy.S
>> 	and memset.S.
>> 	* libc/machine/m68k/Makefile.in: Regenerate.
>> 	* libc/machine/m68k/memcpy.S: Use sub.l followed by dbra.
>> 	* libc/machine/m68k/memset.S: Likewise.
>>
>> Index: newlib/libc/machine/m68k/Makefile.am
>> ===================================================================
>> RCS file: /cvs/src/src/newlib/libc/machine/m68k/Makefile.am,v
>> retrieving revision 1.5
>> diff -u -d -p -r1.5 Makefile.am
>> --- newlib/libc/machine/m68k/Makefile.am	27 Apr 2007 22:10:47  
>> -0000	1.5
>> +++ newlib/libc/machine/m68k/Makefile.am	30 Apr 2007 19:59:36 -0000
>> @@ -8,7 +8,7 @@ AM_CCASFLAGS = $(INCLUDES)
>>   noinst_LIBRARIES = lib.a
>>  -lib_a_SOURCES = setjmp.S strcpy.c strlen.c
>> +lib_a_SOURCES = setjmp.S strcpy.c strlen.c memcpy.S memset.S
>>  lib_a_CCASFLAGS=$(AM_CCASFLAGS)
>>  lib_a_CFLAGS=$(AM_CFLAGS)
>>  Index: newlib/libc/machine/m68k/Makefile.in
>> ===================================================================
>> RCS file: /cvs/src/src/newlib/libc/machine/m68k/Makefile.in,v
>> retrieving revision 1.13
>> diff -u -d -p -r1.13 Makefile.in
>> --- newlib/libc/machine/m68k/Makefile.in	27 Apr 2007 22:10:47  
>> -0000	1.13
>> +++ newlib/libc/machine/m68k/Makefile.in	30 Apr 2007 19:59:36 -0000
>> @@ -56,7 +56,8 @@ ARFLAGS = cru
>>  lib_a_AR = $(AR) $(ARFLAGS)
>>  lib_a_LIBADD =
>>  am_lib_a_OBJECTS = lib_a-setjmp.$(OBJEXT) lib_a-strcpy.$(OBJEXT) \
>> -	lib_a-strlen.$(OBJEXT)
>> +	lib_a-strlen.$(OBJEXT) lib_a-memcpy.$(OBJEXT) \
>> +	lib_a-memset.$(OBJEXT)
>>  lib_a_OBJECTS = $(am_lib_a_OBJECTS)
>>  DEFAULT_INCLUDES = -I. -I$(srcdir)
>>  depcomp =
>> @@ -181,7 +182,7 @@ AUTOMAKE_OPTIONS = cygnus
>>  INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS)
>>  AM_CCASFLAGS = $(INCLUDES)
>>  noinst_LIBRARIES = lib.a
>> -lib_a_SOURCES = setjmp.S strcpy.c strlen.c
>> +lib_a_SOURCES = setjmp.S strcpy.c strlen.c memcpy.S memset.S
>>  lib_a_CCASFLAGS = $(AM_CCASFLAGS)
>>  lib_a_CFLAGS = $(AM_CFLAGS)
>>  ACLOCAL_AMFLAGS = -I ../../..
>> @@ -249,6 +250,18 @@ lib_a-setjmp.o: setjmp.S
>>  lib_a-setjmp.obj: setjmp.S
>>  	$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-setjmp.obj  
>> `if test -f 'setjmp.S'; then $(CYGPATH_W) 'setjmp.S'; else $ 
>> (CYGPATH_W) '$(srcdir)/setjmp.S'; fi`
>>  +lib_a-memcpy.o: memcpy.S
>> +	$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memcpy.o  
>> `test -f 'memcpy.S' || echo '$(srcdir)/'`memcpy.S
>> +
>> +lib_a-memcpy.obj: memcpy.S
>> +	$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memcpy.obj  
>> `if test -f 'memcpy.S'; then $(CYGPATH_W) 'memcpy.S'; else $ 
>> (CYGPATH_W) '$(srcdir)/memcpy.S'; fi`
>> +
>> +lib_a-memset.o: memset.S
>> +	$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memset.o  
>> `test -f 'memset.S' || echo '$(srcdir)/'`memset.S
>> +
>> +lib_a-memset.obj: memset.S
>> +	$(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memset.obj  
>> `if test -f 'memset.S'; then $(CYGPATH_W) 'memset.S'; else $ 
>> (CYGPATH_W) '$(srcdir)/memset.S'; fi`
>> +
>>  .c.o:
>>  	$(COMPILE) -c $<
>>  Index: newlib/libc/machine/m68k/memcpy.S
>> ===================================================================
>> RCS file: /cvs/src/src/newlib/libc/machine/m68k/memcpy.S,v
>> retrieving revision 1.1
>> diff -u -d -p -r1.1 memcpy.S
>> --- newlib/libc/machine/m68k/memcpy.S	27 Apr 2007 16:05:35 -0000	1.1
>> +++ newlib/libc/machine/m68k/memcpy.S	30 Apr 2007 19:59:36 -0000
>> @@ -73,10 +73,11 @@ memcpy:
>>  .Lcopy:
>>  #if !defined (__mcoldfire__)
>>  	dbra	%d0,1b
>> +	sub.l	#0x10000,%d0
>>  #else
>>  	subq.l	#1,%d0
>> -	bpl	1b
>>  #endif
>> +	bpl	1b
>>  	bra	.Lresidue
>>   1:
>> Index: newlib/libc/machine/m68k/memset.S
>> ===================================================================
>> RCS file: /cvs/src/src/newlib/libc/machine/m68k/memset.S,v
>> retrieving revision 1.1
>> diff -u -d -p -r1.1 memset.S
>> --- newlib/libc/machine/m68k/memset.S	27 Apr 2007 16:05:35 -0000	1.1
>> +++ newlib/libc/machine/m68k/memset.S	30 Apr 2007 19:59:36 -0000
>> @@ -76,10 +76,11 @@ memset:
>>  .Llset:
>>  #if !defined (__mcoldfire__)
>>  	dbra	%d2,1b		| loop until done
>> +	sub.l	#0x10000,%d2
>>  #else
>>  	subq.l	#1,%d2
>> -	bpl	1b
>>  #endif
>> +	bpl	1b
>>  	and.l	#3,%d1		| residue byte transfers, fixed
>>  	move.l	(%sp)+,%d2	| restore d2
>>  	bra	.Lbset
>>
>
> /*
>  *  C library memcpy routine
>  *
>  *  This routine has code to optimize performance on the CPU32+
>  *  and another version for other 68k machines.
>  *
>  *  It could be optimized more for machines with MOVE16 instructions.
>  *
>  *  The routine is placed in this source directory to ensure that it
>  *  is picked up by all applications.
>  *
>  *  W. Eric Norum
>  *  Saskatchewan Accelerator Laboratory
>  *  University of Saskatchewan
>  *  Saskatoon, Saskatchewan, CANADA
>  *  eric@skatter.usask.ca
>  */
>
> #include <string.h>
> #include <rtems/score/m68k.h>
>
> #if defined(__mcpu32__)
> #define COPYSETUP(n)	n--
> #define COPY(to,from,n,size) \
> 	asm volatile ("1:\n" \
> 	     "\tmove." size " (%0)+,(%1)+\n" \
> 	     "\tdbf %2,1b\n"  \
> 	     "\tsub.l #0x10000,%2\n" \
> 	     "\tbpl.b 1b\n" : \
> 		"=a" (from), "=a" (to), "=d" (n) :\
> 		 "0" (from),  "1" (to), "2" (n) : \
> 		 "cc", "memory")
> #else
> #define COPYSETUP(n)
> #define COPY(to,from,n,size) \
> 	asm volatile ("1:\n" \
> 	     "\tmove." size " (%0)+,(%1)+\n" \
> 	     "\tsubq.l #1,%2\n\tbne.b 1b\n" : \
> 		"=a" (from), "=a" (to), "=d" (n) :\
> 		 "0" (from),  "1" (to), "2" (n) : \
> 		 "cc", "memory")
> #endif
>
> /* gcc doesn't know that cpu32+ is better than cpu32 :( */
> #if defined(__mcpu32p__)
> #undef M68k_HAS_MISALIGNED
> #define M68k_HAS_MISALIGNED 1
> #endif
>
> void *
> memcpy(void *s1, const void *s2, size_t n)
> {
> 	char *p1 = s1;
> 	const char *p2 = s2;
>
> 	if (n) {
> 		if (n < 16) {
> 			COPYSETUP (n);
> 			COPY (p1, p2, n, "b");
> 		}
> 		else {
> 			int nbyte;
> 			int nl;
> 			nbyte = (int)p1 & 0x3;
> 			if (nbyte) {
> 				nbyte = 4 - nbyte;
> 				n -= nbyte;
> 				COPYSETUP (nbyte);
> 				COPY (p1, p2, nbyte, "b");
> 			}
> #if (M68K_HAS_MISALIGNED == 0)
> 			/*
> 			 * Take care of machines that can't
> 			 * handle misaligned references.
> 			 */
> 			if ((int)p2 & 0x1) {
> 				COPYSETUP (n);
> 				COPY (p1, p2, n, "b");
> 				return s1;
> 			}
> #endif
> 			nl = (unsigned int)n >> 2;
> 			COPYSETUP (nl);
> 			COPY (p1, p2, nl, "l");
> 			nbyte = (int)n & 0x3;
> 			if (nbyte) {
> 				COPYSETUP (nbyte);
> 				COPY (p1, p2, nbyte, "b");
> 			}
> 		}
> 	}
> 	return s1;
> }

-- 
Eric Norum <norume@aps.anl.gov>
Advanced Photon Source
Argonne National Laboratory
(630) 252-4793




More information about the Newlib mailing list