This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 1/3] PowerPC: Add the low level infrastructure for pthreads


Hi,

On 10-12-2014 17:13, Will Schmidt wrote:
> On Mon, 2014-11-10 at 19:20 -0200, Adhemerval Zanella wrote:
>> On 08-11-2014 01:05, Segher Boessenkool wrote:
>>> On Fri, Nov 07, 2014 at 05:30:38PM -0200, Adhemerval Zanella wrote:
> <snip>
>
>> Thanks, I have updated the patch.  Tested on a power8 with GCC 4.9 (that have
>> support for HTM builtins) and GCC 4.8 (which does not).
> Code-wise, only point of real concern is in elision-lock.c, where i see
> what looks like a dangling zero.   A few cosmetic bits, and
> clarification questions throughout.
> review/commentary/questions sprinkled below.
>
> (review of part 2,3 are in the queue).

Thanks for the review Will.  Below some comments about your questioning.
I will send an updated version shortly.

>> diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h b/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
>> index 4e9c518..396a3d9 100644
>> --- a/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
>> +++ b/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
>> @@ -90,14 +90,23 @@ typedef union
>>         binary compatibility.  */
>>      int __kind;
>>  #if __WORDSIZE == 64
>> -    int __spins;
>> +    short __spins;
>> +    short __elision;
>>      __pthread_list_t __list;
>>  # define __PTHREAD_MUTEX_HAVE_PREV	1
>> +# define __PTHREAD_SPINS             0, 0
>>  #else
>>      unsigned int __nusers;
>>      __extension__ union
>>      {
>> -      int __spins;
>> +      struct
>> +      {
>> +	short __espins;
>> +	short __elision;
>> +# define __spins __elision_data.__espins
>> +# define __elision __elision_data.__elision
>> +# define __PTHREAD_SPINS         { 0, 0 }
>> +      } __elision_data;
>>        __pthread_slist_t __list;
>>      };
>>  #endif
>> @@ -106,9 +115,6 @@ typedef union
>>    long int __align;
>>  } pthread_mutex_t;
>>
>> -/* Mutex __spins initializer used by PTHREAD_MUTEX_INITIALIZER.  */
>> -#define __PTHREAD_SPINS 0
>> -
> This is part of " [__PTHREAD_SPINS]: Adjust to init lock elision field.
> " ?   From the context, and in comparison to s390 related code, I
> conclude that LOCK_ELISION is always enabled for powerpc, so there is no
> need for the non-pair PTHREAD_SPINS value.   correct?

The idea here is just to redefine the pthread_mutex_t fields instead changing its
layout or size.  The __spins will be used only for adaptive mutexes 
(PTHREAD_MUTEX_ADAPTIVE_NP) and it will be set to a maximum value of MAX_ADAPTIVE_COUNT
(100), so a short is more than enough.

>>  typedef union
>>  {
>>    char __size[__SIZEOF_PTHREAD_MUTEXATTR_T];
>> diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-conf.c b/sysdeps/unix/sysv/linux/powerpc/elision-conf.c
>> new file mode 100644
>> index 0000000..60a7900
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/powerpc/elision-conf.c
>> @@ -0,0 +1,81 @@
>> +/* elision-conf.c: Lock elision tunable parameters.
>> +   Copyright (C) 2014 Free Software Foundation, Inc.
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <http://www.gnu.org/licenses/>.  */
>> +
>> +#include "config.h"
>> +#include <pthreadP.h>
>> +#include <elision-conf.h>
>> +#include <unistd.h>
>> +#include <dl-procinfo.h>
>> +
>> +/* Reasonable initial tuning values, may be revised in the future.
>> +   This is a conservative initial value.  */
>> +
>> +struct elision_config __elision_aconf =
>> +  {
>> +    /* How often to not attempt to use elision if a transaction aborted
>> +       because the lock is already acquired.  Expressed in number of lock
>> +       acquisition attempts.  */
>> +    .skip_lock_busy = 3,
>> +    /* How often to not attempt to use elision if a transaction aborted due
>> +       to reasons other than other threads' memory accesses.  Expressed in
>> +       number of lock acquisition attempts.  */
>> +    .skip_lock_internal_abort = 3,
>> +    /* How often to not attempt to use elision if a lock used up all retries
>> +       without success.  Expressed in number of lock acquisition attempts.  */
>> +    .skip_lock_out_of_tbegin_retries = 3,
>> +    /* How often we retry using elision if there is chance for the transaction
>> +       to finish execution (e.g., it wasn't aborted due to the lock being
>> +       already acquired.  */
>> +    .try_tbegin = 3,
>> +    /* Same as SKIP_LOCK_INTERNAL_ABORT but for trylock.  */
>> +    .skip_trylock_internal_abort = 3,
> "How often to not attempt to use elision if a ..."
> I'm struggling a bit with the 'how often to not' logic in the comments
> here.  Perhaps it's just me. :-)    (Noting that it matches the s390/x86
> text).
> Consider something like  "How many times to attempt to use elision after
> a transaction has aborted..."
> Actually, after looking over the aconf.* references in the code below,
> this may need to be "How many times to use a non-transactional lock
> after a transactional failure has occurred because ...". 

I used s390 as base, so I used the same wording.  I will update with your
suggestion

>
>
>> +  };
>> +
>> +/* Force elision for all new locks.  This is used to decide whether existing
>> +   DEFAULT locks should be automatically upgraded to elision in
> Is there an actual 'upgrade' step?  Perhaps "... should automatically
> use elision ... "

I will change it.

>
>> +   pthread_mutex_lock().  Disabled for suid programs.  Only used when elision
>> +   is available.  */
>> +
>> +int __pthread_force_elision attribute_hidden;
>> +
>> +/* Initialize elison.  */
> elision.
> (also this way in x86/s390 copies)

I will change it.

>
>> diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-conf.h b/sysdeps/unix/sysv/linux/powerpc/elision-conf.h
>> new file mode 100644
>> index 0000000..333226a
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/powerpc/elision-conf.h
>> @@ -0,0 +1,43 @@
>> +/* elision-conf.h: Lock elision tunable parameters.
>> +   Copyright (C) 2014 Free Software Foundation, Inc.
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <http://www.gnu.org/licenses/>.  */
>> +
>> +#ifndef _ELISION_CONF_H
>> +#define _ELISION_CONF_H 1
>> +
>> +#include <pthread.h>
>> +#include <time.h>
>> +
>> +/* Should make sure there is no false sharing on this.  */
> False sharing on the structure?  

My understanding is although this structure in handled in read-only case,
it may be the case where it is sharing the address space with some data
that is not read only (since the structure is only 20 bytes).
Now you mention I think it should be aligned to common currently powerpc64
cacheline size (128 bytes).  

>
> +
> +#include <stdio.h>
> +#include <pthread.h>
> +#include <pthreadP.h>
> +#include <lowlevellock.h>
> +#include <elision-conf.h>
> +#include "htm.h"
> +
> +/* PowerISA 2.0.7 Section B.5.5 defines isync to be isuficient to memory
> //insufficient
Fixed.
>> +   barrier in acquire mechanism, a strong sync need to used.  */
> //needs  or //will need
>
> Is this comment unique to the transactional support?   Assuming so, I'd
> go a step further and clarify that here. 
> /* The arch_compare_and_exchange... requires a strong sync ...  for
> transactional ... */
>
> (for future) If this could be incorporated into atomic.h with the other
> implementations of arch_compare... 

Bad wording here, although the ISA pointing clarify things: it is required only
when used along HTM operations.  I will change to:

/* PowerISA 2.0.7 Section B.5.5 defines isync to be insufficient to memory
   barrier in acquire mechanism for HTM operations, a stronger 'sync' is
   required.  */

>
>> +#undef __arch_compare_and_exchange_val_32_acq
>> +#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval)           \
>> +  ({                                                                          \
>> +      __typeof (*(mem)) __tmp;                                                \
>> +      __typeof (mem)  __memp = (mem);                                         \
>> +      __asm __volatile (                                                      \
>> +                        "1:     lwarx   %0,0,%1" MUTEX_HINT_ACQ "\n"          \
>> +                        "       cmpw    %0,%2\n"                              \
>> +                        "       bne     2f\n"                                 \
>> +                        "       stwcx.  %3,0,%1\n"                            \
>> +                        "       bne-    1b\n"                                 \
>> +                        "2:     sync"                                         \
>> +                        : "=&r" (__tmp)                                       \
>> +                        : "b" (__memp), "r" (oldval), "r" (newval)            \
>> +                        : "cr0", "memory");                                   \
>> +      __tmp;                                                                  \
>> +  })
>> +
>> +#if !defined(LLL_LOCK) && !defined(EXTRAARG)
>> +/* Make sure the configuration code is always linked in for static
>> +   libraries.  */
>> +#include "elision-conf.c"
>> +#endif
>> +
>> +#ifndef EXTRAARG
>> +# define EXTRAARG
>> +#endif
>> +#ifndef LLL_LOCK
>> +# define LLL_LOCK(a,b) lll_lock(a,b), 0
> Dangling zero?  This seems to match x86/s390, but looks weird.

It is because the 'LLL_LOCK' will be used in return at '__lll_lock_elision'
and 'lll_lock' at 'sysdeps/nptl/lowlevellock.h' (which is what powerpc
uses now) does not return anything.


>
> diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
> new file mode 100644
> index 0000000..60aaf90
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
> @@ -0,0 +1,68 @@
> +/* elision-trylock.c: Lock eliding trylock for pthreads.
> +   Copyright (C) 2014 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <pthread.h>
> +#include <pthreadP.h>
> +#include <lowlevellock.h>
> +#include <elision-conf.h>
> +#include "htm.h"
> +
> +#define aconf __elision_aconf
> +
> +/* Try to elide a futex trylock.  FUTEX is the futex variable.  ADAPT_COUNT is
> +   the adaptation counter in the mutex.  */
> +
> +int
> +__lll_trylock_elision (int *futex, short *adapt_count)
> +{
> +  /* Implement POSIX semantics by forbiding nesting elided trylocks.  */
> +  __builtin_tabort (_ABORT_NESTED_TRYLOCK);
> +
> +  /* Only try a transaction if it's worth it.  */
> +  if (*adapt_count > 0)
> +    {
> +      (*adapt_count)--;
> +      goto use_lock;
> +    }
> +
> +  if (__builtin_tbegin (0))
> +    {
> +      if (*futex == 0)
> +	return 0;
> +
> +      /* Lock was busy.  Fall back to normal locking.  */
> +      __builtin_tabort (_ABORT_LOCK_BUSY);
> +    }
> +  else
> +    {
> +      if (_TEXASRU_FAILURE_PERSISTENT (__builtin_get_texasru ()))
> +	{
> +	  /* A persistent failure indicates that a retry will probably
> +	     resulting in another failure.  Use normal locking now and
> result
Fixed.
>> +	     for the next couple of calls.  */
>> +	  if (aconf.skip_trylock_internal_abort > 0)
>> +	    *adapt_count = aconf.skip_trylock_internal_abort;
>> +	}
>> +
>> +	if (aconf.skip_lock_busy > 0)
>> +	  *adapt_count = aconf.skip_lock_busy;
>> +    }
>> +
>> +use_lock:
>> +  return lll_trylock (*futex);
>> +}
> ok.
>
>> diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
>> new file mode 100644
>> index 0000000..59d46bb
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
>> @@ -0,0 +1,32 @@
>> +/* elision-unlock.c: Commit an elided pthread lock.
>> +   Copyright (C) 2014 Free Software Foundation, Inc.
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <http://www.gnu.org/licenses/>.  */
>> +
>> +#include "pthreadP.h"
>> +#include "lowlevellock.h"
>> +#include "htm.h"
>> +
>> +int
>> +__lll_unlock_elision(int *lock, int pshared)
>> +{
>> +  /* When the lock was free we're in a transaction.  */
>> +  if (*lock == 0)
>> +    __builtin_tend (0);
>> +  else
>> +    lll_unlock ((*lock), pshared);
>> +  return 0;
>> +}
> Ok.
>> diff --git a/sysdeps/unix/sysv/linux/powerpc/force-elision.h b/sysdeps/unix/sysv/linux/powerpc/force-elision.h
>> new file mode 100644
>> index 0000000..38c79bf
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/powerpc/force-elision.h
>> @@ -0,0 +1,33 @@
>> +/* force-elision.h: Automatic enabling of elision for mutexes
>> +   Copyright (C) 2014 Free Software Foundation, Inc.
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <http://www.gnu.org/licenses/>.  */
>> +
>> +#ifdef ENABLE_LOCK_ELISION
>> +/* Check for elision on this lock without upgrading.  */
>> +#define DO_ELISION(m)							\
>> +  (__pthread_force_elision						\
>> +   && (m->__data.__kind & PTHREAD_MUTEX_NO_ELISION_NP) == 0)		\
> Is the 'with upgrading' counterpart the 'forcing' as seen in the #define
> below? 

In fact I recently removed this define along with this comment to adjust to recent
fixes on pthread_mutex_unlock (b0a3c1640ab2fb7d16d9b9a8d9c0e524e9cb0001).

>
> +#ifdef __ASSEMBLER__
> +
> +/* tbegin.  */
> +.macro TBEGIN
> +	.byte 0x7c,0x00,0x05,0x1d
> +.endm
> +
> +/* tend. 0  */
> +.macro TEND
> +	.byte 0x7c,0x00,0x05,0x5d
> +.endm
> Can these macros use .long as seen below?

They can, I will fix it.

>
>> +
>> +/* tabort. code  */
>> +.macro TABORT code
>> +        .byte 0x7c
>> +        .byte \code
>> +	.byte 0x07
>> +	.byte 0x1d
> indentation 
Fixed.
>
>> +.endm
>> +
>> +/* mfspr %dst,130  */
> That comment doesn't do much. But this is maybe a good spot to define
> TEXASR...   (here or wherever else it fits..)
>
> "TEXASR - Transaction EXception And Summary Register".  
> "TEXASRU - '' '' Upper half "
I will add this comment.
>
>> +.macro TEXASR dst
>> +	mfspr \dst,130
>> +.endm
>> +
>> +#else
>> +
>> +#include <endian.h>
>> +
>> +/* Official HTM intrinsics interface matching gcc, but works
>> +   on older gcc compatible compilers and binutils.
> GCC
Fixed.
>> +   We should somehow detect if the compiler supports it, because
>> +   it may be able to generate slightly better code.  */
>> +
>> +#define TBEGIN ".long 0x7c00051d"
>> +#define TEND   ".long 0x7c00055d"
>> +#if __BYTE_ORDER == __LITTLE_ENDIAN
>> +# define TABORT ".byte 0x1d,0x07,%1,0x1d"
>> +#else
>> +# define TABORT ".byte 0x7c,%1,0x07,0x1d"
>> +#endif
>> +
>> +#define __force_inline        inline __attribute__((__always_inline__))
>> +
>> +#ifndef __HTM__
>> +
>> +#define _TEXASRU_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \
>> +  (((TEXASR) >> (31-(BITNUM))) & ((1<<(SIZE))-1))
>> +#define _TEXASRU_FAILURE_PERSISTENT(TEXASRU) \
>> +  _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 1)
>> +
>> +static __force_inline
>> +unsigned int _tbegin (void)
>> +{
>> +  unsigned int ret;
>> +  asm volatile (
>> +    TBEGIN "\t\n"
>> +    "mfcr   %0\t\n"
>> +    "rlwinm %0,%0,3,1\t\n"
>> +    "xori %0,%0,1\t\n"
>> +    : "=r" (ret) :: "cr0", "memory");
>> +  return ret;
>> +}
>> +
>> +static __force_inline
>> +unsigned int _tend (void)
>> +{
>> +  unsigned int ret;
>> +  asm volatile (
>> +    TEND "\t\n"
>> +    "mfcr   %0\t\n"
>> +    "rlwinm %0,%0,3,1\t\n"
>> +    "xori %0,%0,1\t\n"
>> +    : "=r" (ret) :: "cr0", "memory");
>> +  return ret;
>> +}
>> +
>> +static __force_inline
>> +unsigned int _tabort (unsigned int code)
>> +{
>> +  unsigned int ret;
>> +  asm volatile (
>> +    TABORT "\t\n"
>> +    "mfcr   %0\t\n"
>> +    "rlwinm %0,%0,3,1\t\n"
>> +    "xori %0,%0,1\t\n"
>> +    : "=r" (ret) : "r" (code) : "cr0", "memory");
>> +  return ret;
>> +}
>> +
>> +static __force_inline
>> +unsigned long _texasru (void)
>> +{
>> +  unsigned long ret;
>> +  asm volatile (
>> +    "mfspr %0,131\t\n"
>> +    : "=r" (ret));
>> +  return ret;
>> +}
>> +
>> +#define __builtin_tbegin(tdb)       _tbegin ()
>> +#define __builtin_tend(nested)      _tend ()
>> +#define __builtin_tabort(abortcode) _tabort (abortcode)
>> +#define __builtin_get_texasru()     _texasru ()
>> +
>> +#else
>> +# include <htmintrin.h>
>> +#endif /* __HTM__  */
>> +
>> +#endif /* __ASSEMBLER__ */
>> +
>> +/* Definitions used for TEXASR Failure code (bits 0:6), they need to be even
>> +   because tabort. always sets the first bit.  */
>> +#define _ABORT_LOCK_BUSY       0x3f   /* Lock already used.  */
>> +#define _ABORT_NESTED_TRYLOCK  0x3e   /* Write operation in trylock.  */
>> +#define _ABORT_SYSCALL         0x3d   /* Syscall issued.  */
>> +
>> +#endif
> Ok.   (I've not closely reviewed the inline asm above, but expect those
> are correct). 
I had to changed it to C macros due they can be used on non static inline
function and GCC complains with a warning.  But yes, I tested both scenarios:
GCC with HTM support and GCC without.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]