This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [Patch, MIPS] Modify memset.S for mips32r6/mips64r6
- From: Steve Ellcey <sellcey at imgtec dot com>
- To: Joseph Myers <joseph at codesourcery dot com>
- Cc: <libc-alpha at sourceware dot org>
- Date: Mon, 5 Jan 2015 13:54:46 -0800
- Subject: Re: [Patch, MIPS] Modify memset.S for mips32r6/mips64r6
- Authentication-results: sourceware.org; auth=none
- References: <2923c970-026c-4e00-be7a-0650e82421b5 at BAMAIL02 dot ba dot imgtec dot org> <alpine dot DEB dot 2 dot 10 dot 1412221759370 dot 5278 at digraph dot polyomino dot org dot uk> <1419276035 dot 27606 dot 60 dot camel at ubuntu-sellcey> <alpine dot DEB dot 2 dot 10 dot 1412222000270 dot 5278 at digraph dot polyomino dot org dot uk>
- Reply-to: <sellcey at imgtec dot com>
On Mon, 2014-12-22 at 20:02 +0000, Joseph Myers wrote:
> On Mon, 22 Dec 2014, Steve Ellcey wrote:
>
> > I considered changing the alignment code to only align on a 4 byte
> > boundary for O32 mode, or ifdef'ing this test but it seemed cleaner to
> > increase the minimum size of buffers that get handled via a simple byte
> > copy loop for both r6 and earlier CPU's.
>
> In that case, submit the change as a preparatory patch, with its own
> justification for being OK for existing CPUs (that it doesn't affect
> performance, or whatever), so that the r6 patch can avoid changing pre-r6
> code.
I removed the non-r6 changes and made a few tweaks based on your
memcpy.S feedback. This is the last glibc patch needed for r6 support.
Steve Ellcey
sellcey@imgtec.com
2015-01-05 Steve Ellcey <sellcey@imgtec.com>
* sysdeps/mips/memset.S (memset): Modify for mips32r6/mips64r6
to avoid using stl/str to align destination.
diff --git a/sysdeps/mips/memset.S b/sysdeps/mips/memset.S
index abd73c2..eddeb57 100644
--- a/sysdeps/mips/memset.S
+++ b/sysdeps/mips/memset.S
@@ -54,6 +54,14 @@
# endif
#endif
+#if __mips_isa_rev > 5
+# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+# undef PREFETCH_STORE_HINT
+# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED
+# endif
+# define R6_CODE
+#endif
+
/* Some asm.h files do not have the L macro definition. */
#ifndef L
# if _MIPS_SIM == _ABIO32
@@ -72,6 +80,15 @@
# endif
#endif
+/* New R6 instructions that may not be in asm.h. */
+#ifndef PTR_LSA
+#if _MIPS_SIM == _ABI64
+# define PTR_LSA dlsa
+#else
+# define PTR_LSA lsa
+#endif
+#endif
+
/* Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE
or PREFETCH_STORE_STREAMED offers a large performance advantage
but PREPAREFORSTORE has some special restrictions to consider.
@@ -231,11 +248,48 @@ LEAF(MEMSET_NAME)
/* If the destination address is not aligned do a partial store to get it
aligned. If it is already aligned just jump to L(aligned). */
L(set0):
+#ifndef R6_CODE
andi t2,a3,(NSIZE-1) /* word-unaligned address? */
beq t2,zero,L(aligned) /* t2 is the unalignment count */
PTR_SUBU a2,a2,t2
C_STHI a1,0(a0)
PTR_ADDU a0,a0,t2
+#else /* R6_CODE */
+ andi t2,a0,(NSIZE-1)
+ lapc t9,L(atable)
+ PTR_LSA t9,t2,t9,2
+ jrc t9
+L(atable):
+ bc L(aligned)
+# ifdef USE_DOUBLE
+ bc L(lb7)
+ bc L(lb6)
+ bc L(lb5)
+ bc L(lb4)
+# endif
+ bc L(lb3)
+ bc L(lb2)
+ bc L(lb1)
+L(lb7):
+ sb a1,6(a0)
+L(lb6):
+ sb a1,5(a0)
+L(lb5):
+ sb a1,4(a0)
+L(lb4):
+ sb a1,3(a0)
+L(lb3):
+ sb a1,2(a0)
+L(lb2):
+ sb a1,1(a0)
+L(lb1):
+ sb a1,0(a0)
+
+ li t9,NSIZE
+ subu t2,t9,t2
+ PTR_SUBU a2,a2,t2
+ PTR_ADDU a0,a0,t2
+#endif /* R6_CODE */
L(aligned):
/* If USE_DOUBLE is not set we may still want to align the data on a 16
@@ -286,8 +340,12 @@ L(loop16w):
bgtz v1,L(skip_pref)
nop
#endif
+#ifdef R6_CODE
+ PREFETCH_FOR_STORE (2, a0)
+#else
PREFETCH_FOR_STORE (4, a0)
PREFETCH_FOR_STORE (5, a0)
+#endif
L(skip_pref):
C_ST a1,UNIT(0)(a0)
C_ST a1,UNIT(1)(a0)