This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] [BZ 18034] [AArch64] Lazy TLSDESC relocation data race fix



On 22/04/15 17:08, Torvald Riegel wrote:
> On Wed, 2015-04-22 at 13:27 +0100, Szabolcs Nagy wrote:
>> Lazy TLSDESC initialization needs to be synchronized with concurrent TLS
>> accesses.
> 
> Please get familiar with https://sourceware.org/glibc/wiki/Concurrency
> Fixes to existing code should use the C11 memory model for concurrent
> code, especially if the fix is about a concurrency issue.
> 
> I agree with Adhemerval that a release MO store seems to be sufficient
> in this case.
> 

Updated the patch.

I used a fence instead of the suggested atomic store, because I
think this part of the concurrency wiki is problematic if glibc
ever tries to move to C11:

"We (currently) do not use special types for the variables accessed
by atomic operations, but the underlying non-atomic types with
suitable alignment on each architecture."

The release fence generates the same code (dmb ish) as the previous
__sync_synchronize (release fence is slightly stronger than what arm
actually needs here: only store-store is needed, release fence gives
load-store barrier too).

I haven't changed other parts of the patch.

Changelog:

2015-04-27  Szabolcs Nagy  <szabolcs.nagy@arm.com>

	[BZ #18034]
	* sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Declare.
	* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Define.
	(_dl_tlsdesc_undefweak): Guarantee load-load ordering.
	(_dl_tlsdesc_dynamic): Likewise.
	* sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_resolve_rela_fixup): Add
	barrier between stores.
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index be9b9b3..ff74b52 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -79,6 +79,25 @@ _dl_tlsdesc_return:
 	cfi_endproc
 	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
 
+	/* Same as _dl_tlsdesc_return but with synchronization for
+	   lazy relocation.
+	   Prototype:
+	   _dl_tlsdesc_return_lazy (tlsdesc *) ;
+	 */
+	.hidden _dl_tlsdesc_return_lazy
+	.global	_dl_tlsdesc_return_lazy
+	.type	_dl_tlsdesc_return_lazy,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_return_lazy:
+	/* The ldar guarantees ordering between the load from [x0] at the
+	   call site and the load from [x0,#8] here for lazy relocation. */
+	ldar	xzr, [x0]
+	ldr	x0, [x0, #8]
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
+
 	/* Handler for undefined weak TLS symbols.
 	   Prototype:
 	   _dl_tlsdesc_undefweak (tlsdesc *);
@@ -96,6 +115,9 @@ _dl_tlsdesc_return:
 _dl_tlsdesc_undefweak:
 	str	x1, [sp, #-16]!
 	cfi_adjust_cfa_offset(16)
+	/* The ldar guarantees ordering between the load from [x0] at the
+	   call site and the load from [x0,#8] here for lazy relocation. */
+	ldar	xzr, [x0]
 	ldr	x0, [x0, #8]
 	mrs	x1, tpidr_el0
 	sub	x0, x0, x1
@@ -152,6 +174,9 @@ _dl_tlsdesc_dynamic:
 	stp	x3,  x4, [sp, #32+16*1]
 
 	mrs	x4, tpidr_el0
+	/* The ldar guarantees ordering between the load from [x0] at the
+	   call site and the load from [x0,#8] here for lazy relocation. */
+	ldar	xzr, [x0]
 	ldr	x1, [x0,#8]
 	ldr	x0, [x4]
 	ldr	x3, [x1,#16]
diff --git a/sysdeps/aarch64/dl-tlsdesc.h b/sysdeps/aarch64/dl-tlsdesc.h
index 7a1285e..e6c0078 100644
--- a/sysdeps/aarch64/dl-tlsdesc.h
+++ b/sysdeps/aarch64/dl-tlsdesc.h
@@ -46,6 +46,9 @@ extern ptrdiff_t attribute_hidden
 _dl_tlsdesc_return (struct tlsdesc *);
 
 extern ptrdiff_t attribute_hidden
+_dl_tlsdesc_return_lazy (struct tlsdesc *);
+
+extern ptrdiff_t attribute_hidden
 _dl_tlsdesc_undefweak (struct tlsdesc *);
 
 extern ptrdiff_t attribute_hidden
diff --git a/sysdeps/aarch64/tlsdesc.c b/sysdeps/aarch64/tlsdesc.c
index 4821f8c..2e55c07 100644
--- a/sysdeps/aarch64/tlsdesc.c
+++ b/sysdeps/aarch64/tlsdesc.c
@@ -25,6 +25,7 @@
 #include <dl-tlsdesc.h>
 #include <dl-unmap-segments.h>
 #include <tlsdeschtab.h>
+#include <atomic.h>
 
 /* The following functions take an entry_check_offset argument.  It's
    computed by the caller as an offset between its entry point and the
@@ -87,6 +88,8 @@ _dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
   if (!sym)
     {
       td->arg = (void*) reloc->r_addend;
+      /* Store-store barrier so the two writes are not reordered. */
+      atomic_thread_fence_release ();
       td->entry = _dl_tlsdesc_undefweak;
     }
   else
@@ -98,6 +101,8 @@ _dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
 	{
 	  td->arg = _dl_make_tlsdesc_dynamic (result, sym->st_value
 					      + reloc->r_addend);
+	  /* Store-store barrier so the two writes are not reordered. */
+	  atomic_thread_fence_release ();
 	  td->entry = _dl_tlsdesc_dynamic;
 	}
       else
@@ -105,7 +110,9 @@ _dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
 	{
 	  td->arg = (void*) (sym->st_value + result->l_tls_offset
 			     + reloc->r_addend);
-	  td->entry = _dl_tlsdesc_return;
+	  /* Store-store barrier so the two writes are not reordered. */
+	  atomic_thread_fence_release ();
+	  td->entry = _dl_tlsdesc_return_lazy;
 	}
     }
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]