This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] memset.S and PowerPC


On Tue, 2002-06-18 at 12:34, Ulrich Drepper wrote:

> I have not the slightest interest in adding more hacks.  The code works
> fine for the platforms so far supported.  ...

Actually the current code (memset.S) fails on any PowerPC system with a different (then 32-byte) cache-line size. As all 64-bit PowerPC Hardware implementations have a 128 byte cache-lines, this has been a issue for some time.

> ... If the final solution
> requires using the auxiliary vector values and therefore a memory load,
> so be it. But no going back to the generic code and no disabling the
> benefits for supported implementations.

OK the attach patch adds support to capture the AT_DCACHEBSIZE from the aux vector during libc (and ld.so) init and store it in static "int __cache_line_size". __cache_line_size is used by sysdeps/powerpc/memset.S to determine the actual cache-line size. The original implementation (hardcode for 32-byte cache-lines) is left intact (logic and cache alignment) and is used when the cache-line size matches. Otherwise cache-line size independent code is used. If the __cache_line_size value is 0 (for example an old version of the kernel that doesn't support AT_DCACHEBSIZE) then use a slower but safe sequence.

2002-07-08  Steven Munroe  <sjmunroe@us.ibm.com>

	* sysdeps/powerpc/elf/libc-start.c : Scan Aux Vector for 
	AT_DCACHEBSIZE and copy value to __cache_line_size.
      * sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c : Scan Aux Vector for 
	AT_DCACHEBSIZE and copy value to __cache_line_size.
	* sysdeps/powerpc/memset.S : Define __cache_line_size and use its
	value to select the correct stride for dcbz.


diff -rc2P -x manual glibc-2.2.5/sysdeps/powerpc/elf/libc-start.c glibc-225memset/sysdeps/powerpc/elf/libc-start.c
*** glibc-2.2.5/sysdeps/powerpc/elf/libc-start.c	Thu Jul  5 23:56:01 2001
--- glibc-225memset/sysdeps/powerpc/elf/libc-start.c	Tue Jul  9 08:40:18 2002
***************
*** 27,30 ****
--- 27,34 ----
  extern int _dl_starting_up;
  weak_extern (_dl_starting_up)
+ 
+ extern int __cache_line_size;
+ weak_extern (__cache_line_size)
+ 
  extern int __libc_multiple_libcs;
  extern void *__libc_stack_end;
***************
*** 38,41 ****
--- 42,63 ----
  };
  
+ 
+ static inline void
+ __aux_init_cache (ElfW(auxv_t) *av)
+ {
+   for (; av->a_type != AT_NULL; ++av)
+     switch (av->a_type)
+       {
+       case AT_DCACHEBSIZE:
+       	{
+ 			int *cls = & __cache_line_size;
+ 			if (cls != NULL)
+     			*cls = av->a_un.a_val;
+ 		}
+ 		break;
+       }
+ }
+ 
+ 
  int
  /* GKM FIXME: GCC: this should get __BP_ prefix by virtue of the
***************
*** 75,80 ****
        auxvec = ubp_ev;
        while (*(char *__unbounded *__unbounded) auxvec != NULL)
! 	++auxvec;
!       ++auxvec;
  #ifndef SHARED
        _dl_aux_init ((ElfW(auxv_t) *) auxvec);
--- 97,103 ----
        auxvec = ubp_ev;
        while (*(char *__unbounded *__unbounded) auxvec != NULL)
! 		++(char**)auxvec;
!       ++(char**)auxvec;
! 
  #ifndef SHARED
        _dl_aux_init ((ElfW(auxv_t) *) auxvec);
***************
*** 83,87 ****
      }
  
!   INIT_ARGV_and_ENVIRON;
  
    /* Store something that has some relationship to the end of the
--- 106,112 ----
      }
  
!   INIT_ARGV_and_ENVIRON;      
!     /* set up cache line size etc from aux vector. */
!     __aux_init_cache((ElfW(auxv_t) *) auxvec);
  
    /* Store something that has some relationship to the end of the
diff -rc2P -x manual glibc-2.2.5/sysdeps/powerpc/memset.S glibc-225memset/sysdeps/powerpc/memset.S
*** glibc-2.2.5/sysdeps/powerpc/memset.S	Thu Jul  5 23:56:01 2001
--- glibc-225memset/sysdeps/powerpc/memset.S	Thu Jun 27 14:26:13 2002
***************
*** 22,31 ****
  #include <bp-asm.h>
  
  /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
     Returns 's'.
  
!    The memset is done in three sizes: byte (8 bits), word (32 bits),
!    cache line (256 bits). There is a special case for setting cache lines
!    to 0, to take advantage of the dcbz instruction.  */
  
  EALIGN (BP_SYM (memset), 5, 1)
--- 22,44 ----
  #include <bp-asm.h>
  
+ /* define a global static that can hold the cache line size. The 
+    assumption is that startup code will access the "aux vector" 
+    stuff cache line value into this variable */
+    
+ 	.globl __cache_line_size
+ 	.section	".sdata","aw"
+ 	.align 2
+ 	.type	 __cache_line_size,@object
+ 	.size	 __cache_line_size,4
+ __cache_line_size:
+ 	.long 0
+ 	.section	".text"
  /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
     Returns 's'.
  
!    The memset is done in four sizes: byte (8 bits), word (32 bits),
!    32-byte blocks (256 bits) and cache line (128, 256, 1024 bits). 
!    There is a special case for setting cache lines to 0, to take 
!    advantage of the dcbz instruction.  */
  
  EALIGN (BP_SYM (memset), 5, 1)
***************
*** 51,54 ****
--- 64,71 ----
  #define rNEG32	r9	/* constant -32 for clearing with dcbz */
  
+ #define rGOT	r9	/* address of the Global Offset Table  */
+ #define rCLS	r8	/* cache line size obtained from static */
+ #define rCLM	r9	/* cache line mask to check for cache alignment */
+ 
  #if __BOUNDED_POINTERS__
  	cmplwi	cr1, rRTN, 0
***************
*** 106,110 ****
  	clrrwi.	rALIGN, rLEN, 5
  	mtcrf	0x01, rLEN	/* 40th instruction from .align */
! 	beq	cr1, L(zloopstart) /* special case for clearing memory using dcbz */
  	srwi	rTMP, rALIGN, 5
  	mtctr	rTMP
--- 123,137 ----
  	clrrwi.	rALIGN, rLEN, 5
  	mtcrf	0x01, rLEN	/* 40th instruction from .align */
! 	
! /*  Check if we can use the special case for clearing memory using dcbz.
!     This requires that we know the correct cache line size for this    
!     processor. Getting the __cache_line_size requires establishing GOT
!     addressability . So branch out of line to set this up. */
! 	beq	cr1, L(checklinesize) 
! 	
! /*  Store blocks of 32-bytes (265-bits) starting on a 32-byte boundary. 
!     Can't assume that rCHR is zero or that the cache line size is either
!     32-bytes or even known */
! L(nondcbz):
  	srwi	rTMP, rALIGN, 5
  	mtctr	rTMP
***************
*** 115,119 ****
  	bdz	L(cloopdone)	/* 48th instruction from .align */
  
! L(c3):	dcbz	rNEG64, rMEMP
  	stw	rCHR, -4(rMEMP)
  	stw	rCHR, -8(rMEMP)
--- 142,148 ----
  	bdz	L(cloopdone)	/* 48th instruction from .align */
  
! /*  cant use dcbz here sinse we do not know the cache line size. use data 
!     cache block touch which is safe */
! L(c3):	dcbt	rNEG64, rMEMP
  	stw	rCHR, -4(rMEMP)
  	stw	rCHR, -8(rMEMP)
***************
*** 143,147 ****
  	.align 5
  	nop
! /* Clear lines of memory in 128-byte chunks.  */
  L(zloopstart):
  	clrlwi	rLEN, rLEN, 27
--- 172,181 ----
  	.align 5
  	nop
! /* Clear lines of memory in 128-byte chunks.  
!    This code is optimized for processors with 32-byte 
!    cache lines. It is further optimized for the 601
!    processor so preserving alignment in the i-cache is
!    important.
! */
  L(zloopstart):
  	clrlwi	rLEN, rLEN, 27
***************
*** 227,229 ****
--- 261,326 ----
  	stw	rCHR, -8(rMEMP)
  	blr
+ 	
+ L(checklinesize):
+ 	mflr	rTMP
+ /*	If the remaining length is less the 32 bytes, don't bother getting 
+ 	the cache line size */
+ 	beq	L(medium)
+ 	
+ /*	Establishes GOT addressability so we can load __cache_line_size 
+     from static. This value was set from the aux vector during startup. */
+ 	bl		_GLOBAL_OFFSET_TABLE_@local-4
+ 	mflr	rGOT
+ 	lwz		rGOT,__cache_line_size@got(rGOT)
+ 	lwz		rCLS,0(rGOT)
+ 	mtlr	rTMP
+ 	
+ /*	If the cache line size was not set just goto to L(nondcbz) which is 
+ 	safe for any cache line size. */	
+ 	cmplwi	cr1,rCLS,0
+ 	beq		cr1,L(nondcbz)
+ 	
+ /*	If the cache line size is 32 bytes just goto to L(zloopstart) 
+ 	which is coded specificly for 32-byte lines (and 601). */	
+ 	cmplwi	cr1,rCLS,32
+ 	beq		cr1,L(zloopstart)
+ 	
+ /* 	Now we know the cache line size, and it is not 32-bytes, but
+ 	we may not yet be aligned to the cache line. May have a partial 
+ 	line to fill, so touch it 1st */	
+ 	dcbt	0,rMEMP	
+ 	addi	rCLM,rCLS,-1
+ L(getCacheAligned):
+ 	cmplwi	cr1,rLEN,32
+ 	and.	rTMP,rCLM,rMEMP
+ 	blt		cr1,L(handletail32)
+ 	beq		L(cacheAligned)
+ 	addi	rMEMP,rMEMP,32
+ 	addi	rLEN,rLEN,-32
+ 	stw		rCHR,-32(rMEMP)
+ 	stw		rCHR,-28(rMEMP)
+ 	stw		rCHR,-24(rMEMP)
+ 	stw		rCHR,-20(rMEMP)
+ 	stw		rCHR,-16(rMEMP)
+ 	stw		rCHR,-12(rMEMP)
+ 	stw		rCHR,-8(rMEMP)
+ 	stw		rCHR,-4(rMEMP)
+ 	b		L(getCacheAligned)
+ 	
+ /*  Now we are aligned to the cache line and can use dcbz. */	
+ L(cacheAligned):
+ 	cmplw	cr1,rLEN,rCLS
+ 	blt		cr1,L(handletail32)
+ 	dcbz	0,rMEMP
+ 	subf	rLEN,rCLS,rLEN
+ 	add		rMEMP,rMEMP,rCLS
+ 	b		L(cacheAligned)
+ 
+ /*	We are here because the cache line size was set and was not 32-bytes
+ 	and the remainder (rLEN) is less than the actual cache line size.
+     So set up the preconditions for L(nondcbz) and go there.  */			
+ L(handletail32):
+ 	clrrwi.	rALIGN, rLEN, 5
+ 	b		L(nondcbz)
+ 		
  END (BP_SYM (memset))
diff -rc2P -x manual glibc-2.2.5/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c glibc-225memset/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
*** glibc-2.2.5/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c	Thu Jul  5 23:56:19 2001
--- glibc-225memset/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c	Tue Jul  9 08:37:30 2002
***************
*** 21,24 ****
--- 21,47 ----
  #include "config.h"
  #include "kernel-features.h"
+ #include <ldsodefs.h>
+ 
+ extern int __cache_line_size;
+ weak_extern (__cache_line_size)
+ 
+ #define DL_PLATFORM_INIT __aux_init_cache(_dl_auxv)
+ 
+ /* set __cache_line_size with d-cache line size from the aux vector */
+ static inline void
+ __aux_init_cache (ElfW(auxv_t) *av)
+ {
+   for (; av->a_type != AT_NULL; ++av)
+     switch (av->a_type)
+       {
+       case AT_DCACHEBSIZE:
+       	{
+ 			int *cls = & __cache_line_size;
+ 			if (cls != NULL)
+     			*cls = av->a_un.a_val;
+ 		}
+ 		break;
+       }
+ }
  
  #ifndef __ASSUME_STD_AUXV


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]