MIPS specific string and memory functions

Michael Meissner meissner@redhat.com
Mon Mar 11 07:45:00 GMT 2002


I have checked in the following MIPS specific string and memory
functions.  I had previously submitted just the string functions, and
Jeff asked me to write tests to make sure all of the alignments are
tested.  I did so, adding the tests to the GCC c-torture test (there
didn't seem to be a newlib specific testsuite when I checked out a
newlib tree).

2002-03-11  Michael Meissner  <meissner@redhat.com>

	* libc/machine/mips/Makefile.am (lib_a_SOURCES): Add Mips specific
	variants strlen.c, strcmp.c, strncpy.c, memset.c and memcpy.c.
	* libc/machine/mips/Makefile.in: Regenerate.
	* libc/machine/mips/memcpy.c: New file, optimized for MIPS.
	* libc/machine/mips/memset.c: Dito.
	* libc/machine/mips/strcmp.c: Dito.
	* libc/machine/mips/strlen.c: Dito.
	* libc/machine/mips/strncmp.c: Dito.

*** newlib/libc/machine/mips/Makefile.am.~1~	Thu Feb 17 14:39:47 2000
--- newlib/libc/machine/mips/Makefile.am	Mon Mar 11 10:31:55 2002
*************** INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLA
*** 6,12 ****
  
  noinst_LIBRARIES = lib.a
  
! lib_a_SOURCES = setjmp.S
  
  ACLOCAL_AMFLAGS = -I ../../..
  CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
--- 6,12 ----
  
  noinst_LIBRARIES = lib.a
  
! lib_a_SOURCES = setjmp.S strlen.c strcmp.c strncpy.c memset.c memcpy.c
  
  ACLOCAL_AMFLAGS = -I ../../..
  CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
*** newlib/libc/machine/mips/Makefile.in.~1~	Mon Mar 11 10:31:33 2002
--- newlib/libc/machine/mips/Makefile.in	Mon Mar 11 10:32:07 2002
*************** INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLA
*** 84,90 ****
  
  noinst_LIBRARIES = lib.a
  
! lib_a_SOURCES = setjmp.S
  
  ACLOCAL_AMFLAGS = -I ../../..
  CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
--- 84,90 ----
  
  noinst_LIBRARIES = lib.a
  
! lib_a_SOURCES = setjmp.S strlen.c strcmp.c strncpy.c memset.c memcpy.c
  
  ACLOCAL_AMFLAGS = -I ../../..
  CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
*************** DEFS = @DEFS@ -I. -I$(srcdir) 
*** 98,104 ****
  CPPFLAGS = @CPPFLAGS@
  LIBS = @LIBS@
  lib_a_LIBADD = 
! lib_a_OBJECTS =  setjmp.o
  CFLAGS = @CFLAGS@
  COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
  CCLD = $(CC)
--- 98,104 ----
  CPPFLAGS = @CPPFLAGS@
  LIBS = @LIBS@
  lib_a_LIBADD = 
! lib_a_OBJECTS =  setjmp.o strlen.o strcmp.o strncpy.o memset.o memcpy.o
  CFLAGS = @CFLAGS@
  COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
  CCLD = $(CC)
*** newlib/libc/machine/mips/strlen.c.~1~	Mon Mar 11 10:32:54 2002
--- newlib/libc/machine/mips/strlen.c	Mon Mar 11 10:33:13 2002
***************
*** 0 ****
--- 1,71 ----
+ /*
+  * strlen.c -- strlen function.  On at least some MIPS chips, a simple
+  * strlen is faster than the 'optimized' C version.
+  *
+  * Copyright (c) 2001 Red Hat, Inc.
+  *
+  * The authors hereby grant permission to use, copy, modify, distribute,
+  * and license this software and its documentation for any purpose, provided
+  * that existing copyright notices are retained in all copies and that this
+  * notice is included verbatim in any distributions. No written agreement,
+  * license, or royalty fee is required for any of the authorized uses.
+  * Modifications to this software may be copyrighted by their authors
+  * and need not follow the licensing terms described here, provided that
+  * the new terms are clearly indicated on the first page of each file where
+  * they apply.
+  */
+ 
+ #include <stddef.h>
+ #include <string.h>
+ 
+ /* MIPS16 needs to come first.  */
+ 
+ #if defined(__mips16)
+ size_t
+ strlen (const char *str)
+ {
+   const char *start = str;
+ 
+   while (*str++ != '\0')
+     ;
+ 
+   return str - start + 1;
+ }
+ #elif defined(__mips64)
+ __asm__(""			/* 64-bit MIPS targets */
+ 	"	.set	noreorder\n"
+ 	"	.set	nomacro\n"
+ 	"	.globl	strlen\n"
+ 	"	.ent	strlen\n"
+ 	"strlen:\n"
+ 	"	daddiu	$2,$4,1\n"
+ 	"\n"
+ 	"1:	lbu	$3,0($4)\n"
+ 	"	bnez	$3,1b\n"
+ 	"	daddiu	$4,$4,1\n"
+ 	"\n"
+ 	"	jr	$31\n"
+ 	"	dsubu	$2,$4,$2\n"
+ 	"	.end	strlen\n"
+ 	"	.set	macro\n"
+ 	"	.set	reorder\n");
+ 
+ #else
+ __asm__(""			/* 32-bit MIPS targets */
+ 	"	.set	noreorder\n"
+ 	"	.set	nomacro\n"
+ 	"	.globl	strlen\n"
+ 	"	.ent	strlen\n"
+ 	"strlen:\n"
+ 	"	addiu	$2,$4,1\n"
+ 	"\n"
+ 	"1:	lbu	$3,0($4)\n"
+ 	"	bnez	$3,1b\n"
+ 	"	addiu	$4,$4,1\n"
+ 	"\n"
+ 	"	jr	$31\n"
+ 	"	subu	$2,$4,$2\n"
+ 	"	.end	strlen\n"
+ 	"	.set	macro\n"
+ 	"	.set	reorder\n");
+ #endif
*** newlib/libc/machine/mips/strcmp.c.~1~	Mon Mar 11 10:32:54 2002
--- newlib/libc/machine/mips/strcmp.c	Mon Mar 11 10:33:27 2002
***************
*** 0 ****
--- 1,71 ----
+ /*
+  * strcmp.c -- strcmp function.  On at least some MIPS chips, a strcmp that is
+  * unrolled twice is faster than the 'optimized' C version in newlib.
+  *
+  * Copyright (c) 2001 Red Hat, Inc.
+  *
+  * The authors hereby grant permission to use, copy, modify, distribute,
+  * and license this software and its documentation for any purpose, provided
+  * that existing copyright notices are retained in all copies and that this
+  * notice is included verbatim in any distributions. No written agreement,
+  * license, or royalty fee is required for any of the authorized uses.
+  * Modifications to this software may be copyrighted by their authors
+  * and need not follow the licensing terms described here, provided that
+  * the new terms are clearly indicated on the first page of each file where
+  * they apply.  */
+ 
+ #include <stddef.h>
+ #include <string.h>
+ #include <stdlib.h>
+ 
+ int
+ strcmp (const char *s1, const char *s2)
+ { 
+   unsigned const char *us1 = (unsigned const char *)s1;
+   unsigned const char *us2 = (unsigned const char *)s2;
+   int c1a, c1b;
+   int c2a, c2b;
+ 
+   /* If the pointers aren't both aligned to a 16-byte boundary, do the
+      comparison byte by byte, so that we don't get an invalid page fault if we
+      are comparing a string whose null byte is at the last byte on the last
+      valid page.  */
+   if (((((long)us1) | ((long)us2)) & 1) == 0)
+     {
+       c1a = *us1;
+       for (;;)
+ 	{
+ 	  c1b = *us2;
+ 	  us1 += 2;
+ 	  if (c1a == '\0')
+ 	    goto ret1;
+ 
+ 	  c2a = us1[-1];
+ 	  if (c1a != c1b)
+ 	    goto ret1;
+ 
+ 	  c2b = us2[1];
+ 	  us2 += 2;
+ 	  if (c2a == '\0')
+ 	    break;
+ 
+ 	  c1a = *us1;
+ 	  if (c2a != c2b)
+ 	    break;
+ 	}
+ 
+       return c2a - c2b;
+     }
+   else
+     {
+       do
+ 	{
+ 	  c1a = *us1++;
+ 	  c1b = *us2++;
+ 	}
+       while (c1a != '\0' && c1a == c1b);
+     }
+ 
+  ret1:
+   return c1a - c1b;
+ }
*** newlib/libc/machine/mips/strncpy.c.~1~	Mon Mar 11 10:32:54 2002
--- newlib/libc/machine/mips/strncpy.c	Mon Mar 11 10:35:00 2002
***************
*** 0 ****
--- 1,229 ----
+ /*
+  * strncpy.S -- strncmp function.  On at least some MIPS chips, you get better
+  * code by hand unrolling the loops, and by using store words to zero the
+  * remainder of the buffer than the default newlib C version.
+  *
+  * Copyright (c) 2001 Red Hat, Inc.
+  *
+  * The authors hereby grant permission to use, copy, modify, distribute,
+  * and license this software and its documentation for any purpose, provided
+  * that existing copyright notices are retained in all copies and that this
+  * notice is included verbatim in any distributions. No written agreement,
+  * license, or royalty fee is required for any of the authorized uses.
+  * Modifications to this software may be copyrighted by their authors
+  * and need not follow the licensing terms described here, provided that
+  * the new terms are clearly indicated on the first page of each file where
+  * they apply.  */
+ 
+ #include <string.h>
+ #include <stddef.h>
+ #include <stdlib.h>
+ 
+ #if !defined(__GNUC__) || (__GNUC__ < 3)
+ #define __builtin_expect(a,b) a
+ 
+ #else
+ #ifdef __mips64
+ /* Don't use limits test for the size of long, in order to allow the use of
+    64-bit stores on MIPS3 machines, even if -mlong32 was used.  */
+ typedef unsigned word_type __attribute__ ((mode (DI)));
+ #else
+ typedef unsigned word_type __attribute__ ((mode (SI)));
+ #endif
+ 
+ typedef unsigned si_type __attribute__ ((mode (SI)));
+ typedef unsigned hi_type __attribute__ ((mode (HI)));
+ 
+ #ifndef UNROLL_FACTOR
+ #define UNROLL_FACTOR 4
+ 
+ #elif (UNROLL_FACTOR != 2) && (UNROLL_FACTOR != 4)
+ #error "UNROLL_FACTOR must be 2 or 4"
+ #endif
+ #endif
+ 
+ char *
+ strncpy (char *dst0, const char *src0, size_t count)
+ {
+ #if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) || defined(__mips16) || !defined(__GNUC__) || (__GNUC__ < 3)
+   char *dst, *end;
+   const char *src;
+   int ch;
+ 
+   dst = dst0;
+   src = src0;
+   end = dst + count;
+   while (dst != end)
+     {
+       *dst++ = ch = *src++;
+       if (__builtin_expect (ch == '\0', 0))
+ 	{
+ 	  while (dst != end)
+ 	    *dst++ = '\0';
+ 
+ 	  break;
+ 	}
+     }
+ 
+   return dst0;
+ 
+ #else
+   unsigned char *dst;
+   unsigned char *dst_end;
+   unsigned char *end;
+   const unsigned char *src;
+   int ch0, ch1;
+ #if UNROLL_FACTOR > 2
+   int ch2, ch3;
+ #endif
+   int ch;
+   int odd_bytes;
+   size_t long_count;
+ 
+   dst = (unsigned char *)dst0;
+   src = (unsigned const char *)src0;
+   if (__builtin_expect (count >= 4, 1))
+     {
+       odd_bytes = (count & (UNROLL_FACTOR - 1));
+       count -= odd_bytes;
+ 
+       do
+ 	{
+ 	  ch0 = src[0];
+ 	  ch1 = src[1];
+ #if UNROLL_FACTOR > 2
+ 	  ch2 = src[2];
+ 	  ch3 = src[3];
+ #endif
+ 	  src += UNROLL_FACTOR;
+ 	  count -= UNROLL_FACTOR;
+ 
+ 	  dst[0] = ch0;
+ 	  if (ch0 == '\0')
+ 	    goto found_null0;
+ 
+ 	  dst[1] = ch1;
+ 	  if (ch1 == '\0')
+ 	    goto found_null1;
+ 
+ #if UNROLL_FACTOR > 2
+ 	  dst[2] = ch2;
+ 	  if (ch2 == '\0')
+ 	    goto found_null2;
+ 
+ 	  dst[3] = ch3;
+ 	  if (ch3 == '\0')
+ 	    goto found_null3;
+ #endif
+ 
+ 	  dst += UNROLL_FACTOR;
+ 	}
+       while (count);
+ 
+       /* fall through, count == 0, no null found, deal with last bytes */
+       count = odd_bytes;
+     }
+ 
+   end = dst + count;
+   while (dst != end)
+     {
+       *dst++ = ch = *src++;
+       if (ch == '\0')
+ 	{
+ 	  while (dst != end)
+ 	    *dst++ = '\0';
+ 
+ 	  break;
+ 	}
+     }
+ 
+   return dst0;
+ 
+   /* Found null byte in first byte, count has been decremented by 4, null has
+      been stored in dst[0].  */
+  found_null0:
+   count++;			/* add 1 to cover remaining byte */
+   dst -= 1;			/* adjust dst += 4 gets correct ptr */
+   /* fall through */
+ 
+   /* Found null byte in second byte, count has been decremented by 4, null has
+      been stored in dst[1].  */
+  found_null1:
+ #if UNROLL_FACTOR > 2
+   count++;			/* add 1 to cover remaining byte */
+   dst -= 1;			/* adjust dst += 4 gets correct ptr */
+   /* fall through */
+ 
+   /* Found null byte in third byte, count has been decremented by 4, null has
+      been stored in dst[2].  */
+  found_null2:
+   count++;			/* add 1 to cover remaining byte */
+   dst -= 1;			/* adjust dst += 4 gets correct ptr */
+   /* fall through */
+ 
+   /* Found null byte in fourth byte, count is accurate, dst has not been
+      updated yet.  */
+  found_null3:
+ #endif
+   count += odd_bytes;		/* restore odd byte count */
+   dst += UNROLL_FACTOR;
+ 
+   /* Zero fill remainder of the array.  Unroll the loop, and use word/dword
+      stores where we can.  */
+   while (count && (((long)dst) & (sizeof (word_type) - 1)) != 0)
+     {
+       count--;
+       *dst++ = 0;
+     }
+ 
+   while (count >= UNROLL_FACTOR*sizeof (word_type))
+     {
+       count -= UNROLL_FACTOR*sizeof (word_type);
+       dst += UNROLL_FACTOR*sizeof (word_type);
+ #if UNROLL_FACTOR > 2
+       ((word_type *)(void *)dst)[-4] = 0;
+       ((word_type *)(void *)dst)[-3] = 0;
+ #endif
+       ((word_type *)(void *)dst)[-2] = 0;
+       ((word_type *)(void *)dst)[-1] = 0;
+     }
+ 
+ #if UNROLL_FACTOR > 2
+   if (count >= 2*sizeof (word_type))
+     {
+       count -= 2*sizeof (word_type);
+       ((word_type *)(void *)dst)[0] = 0;
+       ((word_type *)(void *)dst)[1] = 0;
+       dst += 2*sizeof (word_type);
+     }
+ #endif 
+ 
+   if (count >= sizeof (word_type))
+     {
+       count -= sizeof (word_type);
+       ((word_type *)(void *)dst)[0] = 0;
+       dst += sizeof (word_type);
+     }
+ 
+ #ifdef __mips64
+   if (count >= sizeof (si_type))
+     {
+       count -= sizeof (si_type);
+       ((si_type *)(void *)dst)[0] = 0;
+       dst += sizeof (si_type);
+     }
+ #endif
+ 
+   if (count >= sizeof (hi_type))
+     {
+       count -= sizeof (hi_type);
+       ((hi_type *)(void *)dst)[0] = 0;
+       dst += sizeof (hi_type);
+     }
+ 
+   if (count)
+     *dst = '\0';
+ 
+   return dst0;
+ #endif
+ }
*** newlib/libc/machine/mips/memset.c.~1~	Mon Mar 11 10:32:54 2002
--- newlib/libc/machine/mips/memset.c	Mon Mar 11 10:35:32 2002
***************
*** 0 ****
--- 1,142 ----
+ /*
+ FUNCTION
+ 	<<memset>>---set an area of memory, optimized for the MIPS processors
+ 
+ INDEX
+ 	memset
+ 
+ ANSI_SYNOPSIS
+ 	#include <string.h>
+ 	void *memset(const void *<[dst]>, int <[c]>, size_t <[length]>);
+ 
+ TRAD_SYNOPSIS
+ 	#include <string.h>
+ 	void *memset(<[dst]>, <[c]>, <[length]>)
+ 	void *<[dst]>;
+ 	int <[c]>;
+ 	size_t <[length]>;
+ 
+ DESCRIPTION
+ 	This function converts the argument <[c]> into an unsigned
+ 	char and fills the first <[length]> characters of the array
+ 	pointed to by <[dst]> to the value.
+ 
+ RETURNS
+ 	<<memset>> returns the value of <[m]>.
+ 
+ PORTABILITY
+ <<memset>> is ANSI C.
+ 
+     <<memset>> requires no supporting OS subroutines.
+ 
+ QUICKREF
+ 	memset ansi pure
+ */
+ 
+ #include <string.h>
+ 
+ #ifdef __mips64
+ #define wordtype long long
+ #else
+ #define wordtype long
+ #endif
+ 
+ #define LBLOCKSIZE     (sizeof(wordtype))
+ #define UNALIGNED(X)   ((long)(X) & (LBLOCKSIZE - 1))
+ #define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE * 4)
+ 
+ _PTR 
+ _DEFUN (memset, (m, c, n),
+ 	_PTR m _AND
+ 	int c _AND
+ 	size_t n)
+ {
+ #if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) || defined(__mips16)
+   char *s = (char *) m;
+ 
+   while (n-- != 0)
+     {
+       *s++ = (char) c;
+     }
+ 
+   return m;
+ #else
+   char *s = (char *) m;
+   int i;
+   unsigned wordtype buffer;
+   unsigned wordtype *aligned_addr;
+   unsigned short *short_addr;
+   size_t iter;
+ 
+   if (!TOO_SMALL (n))
+     {
+       int unaligned = UNALIGNED (s);
+ 
+       /* We know that N is >= LBLOCKSIZE so we can just word
+          align the S without having to check the length. */
+ 
+       if (unaligned)
+ 	{
+ 	  while (unaligned++ < LBLOCKSIZE)
+ 	    *s++ = (char)c, n--;
+ 	}
+ 
+       /* S is now word-aligned so we can process the remainder
+          in word sized chunks except for a few (< LBLOCKSIZE)
+          bytes which might be left over at the end. */
+ 
+       aligned_addr = (unsigned wordtype *)s;
+ 
+       /* Store C into each char sized location in BUFFER so that
+          we can set large blocks quickly.  */
+       c &= 0xff;
+       buffer = c;
+       if (buffer != 0)
+ 	{
+ 	  if (LBLOCKSIZE == 4)
+ 	    {
+ 	       buffer |= (buffer << 8);
+ 	       buffer |= (buffer << 16);
+ 	    }
+ 	  else if (LBLOCKSIZE == 8)
+ 	    {
+ 	      buffer |= (buffer << 8);
+ 	      buffer |= (buffer << 16);
+ 	      buffer |= ((buffer << 31) << 1);
+ 	    }
+ 	  else
+ 	    {
+ 	      for (i = 1; i < LBLOCKSIZE; i++)
+ 		buffer = (buffer << 8) | c;
+ 	    }
+         }
+ 
+       iter = n / (2*LBLOCKSIZE);
+       n = n % (2*LBLOCKSIZE);
+       while (iter > 0)
+ 	{
+ 	  aligned_addr[0] = buffer;
+ 	  aligned_addr[1] = buffer;
+ 	  aligned_addr += 2;
+ 	  iter--;
+ 	}
+ 
+       if (n >= LBLOCKSIZE)
+ 	{
+ 	  *aligned_addr++ = buffer;
+ 	  n -= LBLOCKSIZE;
+ 	}
+ 
+       /* Pick up the remainder with a bytewise loop.  */
+       s = (char*)aligned_addr;
+     }
+ 
+   while (n > 0)
+     {
+       *s++ = (char)c;
+       n--;
+     }
+ 
+   return m;
+ #endif /* not PREFER_SIZE_OVER_SPEED */
+ }
*** newlib/libc/machine/mips/memcpy.c.~1~	Mon Mar 11 10:32:54 2002
--- newlib/libc/machine/mips/memcpy.c	Mon Mar 11 10:35:14 2002
***************
*** 0 ****
--- 1,164 ----
+ /*
+ FUNCTION
+         <<memcpy>>---copy memory regions, optimized for the mips processors
+ 
+ ANSI_SYNOPSIS
+         #include <string.h>
+         void* memcpy(void *<[out]>, const void *<[in]>, size_t <[n]>);
+ 
+ TRAD_SYNOPSIS
+         void *memcpy(<[out]>, <[in]>, <[n]>
+         void *<[out]>;
+         void *<[in]>;
+         size_t <[n]>;
+ 
+ DESCRIPTION
+         This function copies <[n]> bytes from the memory region
+         pointed to by <[in]> to the memory region pointed to by
+         <[out]>.
+ 
+         If the regions overlap, the behavior is undefined.
+ 
+ RETURNS
+         <<memcpy>> returns a pointer to the first byte of the <[out]>
+         region.
+ 
+ PORTABILITY
+ <<memcpy>> is ANSI C.
+ 
+ <<memcpy>> requires no supporting OS subroutines.
+ 
+ QUICKREF
+         memcpy ansi pure
+ 	*/
+ 
+ #include <_ansi.h>
+ #include <stddef.h>
+ #include <limits.h>
+ 
+ #ifdef __mips64
+ #define wordtype long long
+ #else
+ #define wordtype long
+ #endif
+ 
+ /* Nonzero if either X or Y is not aligned on a "long" boundary.  */
+ #define UNALIGNED(X, Y) \
+   (((long)X & (sizeof (wordtype) - 1)) | ((long)Y & (sizeof (wordtype) - 1)))
+ 
+ /* How many bytes are copied each iteration of the 4X unrolled loop.  */
+ #define BIGBLOCKSIZE    (sizeof (wordtype) << 2)
+ 
+ /* How many bytes are copied each iteration of the word copy loop.  */
+ #define LITTLEBLOCKSIZE (sizeof (wordtype))
+ 
+ /* Threshhold for punting to the byte copier.  */
+ #define TOO_SMALL(LEN)  ((LEN) < BIGBLOCKSIZE)
+ 
+ _PTR
+ _DEFUN (memcpy, (dst0, src0, len0),
+ 	_PTR dst0 _AND
+ 	_CONST _PTR src0 _AND
+ 	size_t len0)
+ {
+ #if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) || defined(__mips16)
+   char *dst = (char *) dst0;
+   char *src = (char *) src0;
+ 
+   _PTR save = dst0;
+ 
+   while (len0--)
+     {
+       *dst++ = *src++;
+     }
+ 
+   return save;
+ #else
+   char *dst = dst0;
+   _CONST char *src = src0;
+   wordtype *aligned_dst;
+   _CONST wordtype *aligned_src;
+   int   len =  len0;
+   size_t iter;
+ 
+   /* Handle aligned moves here.  */
+   if (!UNALIGNED (src, dst))
+     {
+       iter = len / BIGBLOCKSIZE;
+       len = len % BIGBLOCKSIZE;
+       aligned_dst = (wordtype *)dst;
+       aligned_src = (wordtype *)src;
+ 
+ 	  /* Copy 4X long or long long words at a time if possible.  */
+       while (iter > 0)
+ 	{
+ 	  wordtype tmp0 = aligned_src[0];
+ 	  wordtype tmp1 = aligned_src[1];
+ 	  wordtype tmp2 = aligned_src[2];
+ 	  wordtype tmp3 = aligned_src[3];
+ 
+ 	  aligned_dst[0] = tmp0;
+ 	  aligned_dst[1] = tmp1;
+ 	  aligned_dst[2] = tmp2;
+ 	  aligned_dst[3] = tmp3;
+ 	  aligned_src += 4;
+ 	  aligned_dst += 4;
+ 	  iter--;
+ 	}
+ 
+       /* Copy one long or long long word at a time if possible.  */
+       iter = len / LITTLEBLOCKSIZE;
+       len = len % LITTLEBLOCKSIZE;
+ 
+       while (iter > 0)
+ 	{
+ 	  *aligned_dst++ = *aligned_src++;
+ 	  iter--;
+ 	}
+ 
+       /* Pick up any residual with a byte copier.  */
+       dst = (char*)aligned_dst;
+       src = (char*)aligned_src;
+ 
+       while (len > 0)
+ 	{
+ 	  *dst++ = *src++;
+ 	  len--;
+ 	}
+ 
+       return dst0;
+     }
+ 
+   /* Handle unaligned moves here, using lwr/lwl and swr/swl where possible */
+   else
+     {
+ #ifndef NO_UNALIGNED_LOADSTORE
+       int tmp;
+       int *int_src = (int *)src;
+       int *int_dst = (int *)dst;
+       iter = len / 4;
+       len = len % 4;
+       while (iter > 0)
+ 	{
+ 	  __asm__ ("ulw %0,%1" : "=r" (tmp) : "m" (*int_src));
+ 	  iter--;
+ 	  int_src++;
+ 	  __asm__ ("usw %1,%0" : "=m" (*int_dst) : "r" (tmp));
+ 	  int_dst++;
+ 	}
+ 
+       /* Pick up any residual with a byte copier.  */
+       dst = (char*)int_dst;
+       src = (char*)int_src;
+ #endif
+ 
+       while (len > 0)
+ 	{
+ 	  *dst++ = *src++;
+ 	  len--;
+ 	}
+ 
+       return dst0;
+     }
+ #endif /* not PREFER_SIZE_OVER_SPEED */
+ }

-- 
Michael Meissner, Red Hat, Inc.  (GCC group)
PMB 198, 174 Littleton Road #3, Westford, Massachusetts 01886, USA
Work:	  meissner@redhat.com		phone: +1 978-486-9304
Non-work: meissner@the-meissners.org	fax:   +1 978-692-4482



More information about the Newlib mailing list