[PATCH] Remove atomic_forced_read

Mon Aug 1 13:18:51 GMT 2022

On 7/20/22 12:50, Wilco Dijkstra via Libc-alpha wrote:
> Remove the odd atomic_forced_read which is neither atomic nor forced.
> Some uses are completely redundant, so simply remove them. In other cases
> the intended use is to force a memory ordering, so use acquire load for those.
> In yet other cases their purpose is unclear, for example __nscd_cache_search
> appears to allow concurrent accesses to the cache while it is being garbage
> collected by another thread! Use relaxed atomic loads here to block spills
> from accidentally reloading memory that is being changed - however given
> there are multiple accesses without any synchronization, it is unclear how this
> could ever work reliably...
> 
> Passes regress on AArch64, OK for commit?

This fails pre-commit CI in that it doesn't apply.

How are you generating these patches?

> ---
> 
> diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c
> index 8cb32321da33ea4f41e4a6cee038c2a6697ad817..02c63a7062b2be0f37a412160fdb2b3468cc70cf 100644
> --- a/elf/dl-lookup.c
> +++ b/elf/dl-lookup.c
> @@ -346,12 +346,12 @@ do_lookup_x (const char *undef_name, unsigned int new_hash,
>  	     const struct r_found_version *const version, int flags,
>  	     struct link_map *skip, int type_class, struct link_map *undef_map)
>  {
> -  size_t n = scope->r_nlist;
> -  /* Make sure we read the value before proceeding.  Otherwise we
> +  /* Make sure we read r_nlist before r_list, or otherwise we
>       might use r_list pointing to the initial scope and r_nlist being
>       the value after a resize.  That is the only path in dl-open.c not
> -     protected by GSCOPE.  A read barrier here might be to expensive.  */
> -  __asm volatile ("" : "+r" (n), "+m" (scope->r_list));
> +     protected by GSCOPE.  This works if all updates also use a store-
> +     release or release barrier.  */
> +  size_t n = atomic_load_acquire (&scope->r_nlist);
>    struct link_map **list = scope->r_list;
>  
>    do
> @@ -528,15 +528,13 @@ add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
>    if (is_nodelete (map, flags))
>      return 0;
>  
> -  struct link_map_reldeps *l_reldeps
> -    = atomic_forced_read (undef_map->l_reldeps);
> -
>    /* Make sure l_reldeps is read before l_initfini.  */
> -  atomic_read_barrier ();
> +  struct link_map_reldeps *l_reldeps
> +    = atomic_load_acquire (&undef_map->l_reldeps);
>  
>    /* Determine whether UNDEF_MAP already has a reference to MAP.  First
>       look in the normal dependencies.  */
> -  struct link_map **l_initfini = atomic_forced_read (undef_map->l_initfini);
> +  struct link_map **l_initfini = undef_map->l_initfini;
>    if (l_initfini != NULL)
>      {
>        for (i = 0; l_initfini[i] != NULL; ++i)
> @@ -570,7 +568,7 @@ add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
>  	 it can e.g. point to unallocated memory.  So avoid the optimizer
>  	 treating the above read from MAP->l_serial as ensurance it
>  	 can safely dereference it.  */
> -      map = atomic_forced_read (map);
> +      __asm ("" : "=r" (map) : "0" (map));
>  
>        /* From this point on it is unsafe to dereference MAP, until it
>  	 has been found in one of the lists.  */
> diff --git a/include/atomic.h b/include/atomic.h
> index 53bbf0423344ceda6cf98653ffa90e8d4f5d81aa..8eb56362ba18eb4836070930d5f2e769fb6a0a1e 100644
> --- a/include/atomic.h
> +++ b/include/atomic.h
> @@ -119,11 +119,6 @@
>  #endif
>  
>  
> -#ifndef atomic_forced_read
> -# define atomic_forced_read(x) \
> -  ({ __typeof (x) __x; __asm ("" : "=r" (__x) : "0" (x)); __x; })
> -#endif
> -
>  /* This is equal to 1 iff the architecture supports 64b atomic operations.  */
>  #ifndef __HAVE_64B_ATOMICS
>  #error Unable to determine if 64-bit atomics are present.
> diff --git a/malloc/malloc-debug.c b/malloc/malloc-debug.c
> index 43604ac2641e2b80eb0e4f20747af895ab2e6d55..4e56ff71f0fd1895c770f58667db93c0372a5aee 100644
> --- a/malloc/malloc-debug.c
> +++ b/malloc/malloc-debug.c
> @@ -168,7 +168,7 @@ static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
>  static void *
>  __debug_malloc (size_t bytes)
>  {
> -  void *(*hook) (size_t, const void *) = atomic_forced_read (__malloc_hook);
> +  void *(*hook) (size_t, const void *) = __malloc_hook;
>    if (__builtin_expect (hook != NULL, 0))
>      return (*hook)(bytes, RETURN_ADDRESS (0));
>  
> @@ -192,7 +192,7 @@ strong_alias (__debug_malloc, malloc)
>  static void
>  __debug_free (void *mem)
>  {
> -  void (*hook) (void *, const void *) = atomic_forced_read (__free_hook);
> +  void (*hook) (void *, const void *) = __free_hook;
>    if (__builtin_expect (hook != NULL, 0))
>      {
>        (*hook)(mem, RETURN_ADDRESS (0));
> @@ -216,8 +216,7 @@ strong_alias (__debug_free, free)
>  static void *
>  __debug_realloc (void *oldmem, size_t bytes)
>  {
> -  void *(*hook) (void *, size_t, const void *) =
> -    atomic_forced_read (__realloc_hook);
> +  void *(*hook) (void *, size_t, const void *) = __realloc_hook;
>    if (__builtin_expect (hook != NULL, 0))
>      return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
>  
> @@ -270,8 +269,7 @@ strong_alias (__debug_realloc, realloc)
>  static void *
>  _debug_mid_memalign (size_t alignment, size_t bytes, const void *address)
>  {
> -  void *(*hook) (size_t, size_t, const void *) =
> -    atomic_forced_read (__memalign_hook);
> +  void *(*hook) (size_t, size_t, const void *) = __memalign_hook;
>    if (__builtin_expect (hook != NULL, 0))
>      return (*hook)(alignment, bytes, address);
>  
> @@ -363,7 +361,7 @@ __debug_calloc (size_t nmemb, size_t size)
>        return NULL;
>      }
>  
> -  void *(*hook) (size_t, const void *) = atomic_forced_read (__malloc_hook);
> +  void *(*hook) (size_t, const void *) = __malloc_hook;
>    if (__builtin_expect (hook != NULL, 0))
>      {
>        void *mem = (*hook)(bytes, RETURN_ADDRESS (0));
> diff --git a/nptl/pthread_sigqueue.c b/nptl/pthread_sigqueue.c
> index 48dc3ca4ee5673b7b4b2543b823d6e9354ae9849..f4149fb1779eacea0ead107f7bfce32b22114f3b 100644
> --- a/nptl/pthread_sigqueue.c
> +++ b/nptl/pthread_sigqueue.c
> @@ -33,7 +33,7 @@ __pthread_sigqueue (pthread_t threadid, int signo, const union sigval value)
>    /* Force load of pd->tid into local variable or register.  Otherwise
>       if a thread exits between ESRCH test and tgkill, we might return
>       EINVAL, because pd->tid would be cleared by the kernel.  */
> -  pid_t tid = atomic_forced_read (pd->tid);
> +  pid_t tid = atomic_load_relaxed (&pd->tid);
>    if (__glibc_unlikely (tid <= 0))
>      /* Not a valid thread handle.  */
>      return ESRCH;
> diff --git a/nscd/nscd_helper.c b/nscd/nscd_helper.c
> index fc41bfdb6eebb880d6132ea5cf409ca657570f82..7a9a49955691e15079a94b78a12f9efed381ecb5 100644
> --- a/nscd/nscd_helper.c
> +++ b/nscd/nscd_helper.c
> @@ -454,7 +454,6 @@ __nscd_cache_search (request_type type, const char *key, size_t keylen,
>    size_t datasize = mapped->datasize;
>  
>    ref_t trail = mapped->head->array[hash];
> -  trail = atomic_forced_read (trail);
>    ref_t work = trail;
>    size_t loop_cnt = datasize / (MINIMUM_HASHENTRY_SIZE
>  				+ offsetof (struct datahead, data) / 2);
> @@ -465,32 +464,29 @@ __nscd_cache_search (request_type type, const char *key, size_t keylen,
>        struct hashentry *here = (struct hashentry *) (mapped->data + work);
>        ref_t here_key, here_packet;
>  
> -#if !_STRING_ARCH_unaligned
>        /* Although during garbage collection when moving struct hashentry
>  	 records around we first copy from old to new location and then
>  	 adjust pointer from previous hashentry to it, there is no barrier
> -	 between those memory writes.  It is very unlikely to hit it,
> -	 so check alignment only if a misaligned load can crash the
> -	 application.  */
> +	 between those memory writes!!! This is extremely risky on any
> +	 modern CPU which can reorder memory accesses very aggressively.
> +	 Check alignment, both as a partial consistency check and to avoid
> +	 crashes on targets which require atomic loads to be aligned.  */
>        if ((uintptr_t) here & (__alignof__ (*here) - 1))
>  	return NULL;
> -#endif
>  
>        if (type == here->type
>  	  && keylen == here->len
> -	  && (here_key = atomic_forced_read (here->key)) + keylen <= datasize
> +	  && (here_key = atomic_load_relaxed (&here->key)) + keylen <= datasize
>  	  && memcmp (key, mapped->data + here_key, keylen) == 0
> -	  && ((here_packet = atomic_forced_read (here->packet))
> +	  && ((here_packet = atomic_load_relaxed (&here->packet))
>  	      + sizeof (struct datahead) <= datasize))
>  	{
>  	  /* We found the entry.  Increment the appropriate counter.  */
>  	  struct datahead *dh
>  	    = (struct datahead *) (mapped->data + here_packet);
>  
> -#if !_STRING_ARCH_unaligned
>  	  if ((uintptr_t) dh & (__alignof__ (*dh) - 1))
>  	    return NULL;
> -#endif
>  
>  	  /* See whether we must ignore the entry or whether something
>  	     is wrong because garbage collection is in progress.  */
> @@ -501,7 +497,7 @@ __nscd_cache_search (request_type type, const char *key, size_t keylen,
>  	    return dh;
>  	}
>  
> -      work = atomic_forced_read (here->next);
> +      work = atomic_load_relaxed (&here->next);
>        /* Prevent endless loops.  This should never happen but perhaps
>  	 the database got corrupted, accidentally or deliberately.  */
>        if (work == trail || loop_cnt-- == 0)
> @@ -511,16 +507,14 @@ __nscd_cache_search (request_type type, const char *key, size_t keylen,
>  	  struct hashentry *trailelem;
>  	  trailelem = (struct hashentry *) (mapped->data + trail);
>  
> -#if !_STRING_ARCH_unaligned
>  	  /* We have to redo the checks.  Maybe the data changed.  */
>  	  if ((uintptr_t) trailelem & (__alignof__ (*trailelem) - 1))
>  	    return NULL;
> -#endif
>  
>  	  if (trail + MINIMUM_HASHENTRY_SIZE > datasize)
>  	    return NULL;
>  
> -	  trail = atomic_forced_read (trailelem->next);
> +	  trail = atomic_load_relaxed (&trailelem->next);
>  	}
>        tick = 1 - tick;
>      }
> 
> 
> 

-- 
Cheers,
Carlos.