1 /* Thread-local storage handling in the ELF dynamic linker. Generic version.
2 Copyright (C) 2002-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
25 #include <sys/param.h>
36 #define TUNABLE_NAMESPACE rtld
37 #include <dl-tunables.h>
39 /* Surplus static TLS, GLRO(dl_tls_static_surplus), is used for
41 - IE TLS in libc.so for all dlmopen namespaces except in the initial
42 one where libc.so is not loaded dynamically but at startup time,
43 - IE TLS in other libraries which may be dynamically loaded even in the
45 - and optionally for optimizing dynamic TLS access.
47 The maximum number of namespaces is DL_NNS, but to support that many
48 namespaces correctly the static TLS allocation should be significantly
49 increased, which may cause problems with small thread stacks due to the
50 way static TLS is accounted (bug 11787).
52 So there is a rtld.nns tunable limit on the number of supported namespaces
53 that affects the size of the static TLS and by default it's small enough
54 not to cause problems with existing applications. The limit is not
55 enforced or checked: it is the user's responsibility to increase rtld.nns
56 if more dlmopen namespaces are used.
58 Audit modules use their own namespaces, they are not included in rtld.nns,
59 but come on top when computing the number of namespaces. */
61 /* Size of initial-exec TLS in libc.so. This should be the maximum of
62 observed PT_GNU_TLS sizes across all architectures. Some
63 architectures have lower values due to differences in type sizes
64 and link editor capabilities. */
65 #define LIBC_IE_TLS 144
67 /* Size of initial-exec TLS in libraries other than libc.so.
68 This should be large enough to cover runtime libraries of the
69 compiler such as libgomp and libraries in libc other than libc.so. */
70 #define OTHER_IE_TLS 144
72 /* Default number of namespaces. */
75 /* Default for dl_tls_static_optional. */
76 #define OPTIONAL_TLS 512
78 /* Used to count the number of threads currently executing dynamic TLS
79 updates. Used to avoid recursive malloc calls in __tls_get_addr
80 for an interposed malloc that uses global-dynamic TLS (which is not
81 recommended); see _dl_tls_allocate_active checks. This could be a
82 per-thread flag, but would need TLS access in the dynamic linker. */
83 unsigned int _dl_tls_threads_in_update
;
86 _dl_tls_allocate_begin (void)
88 atomic_fetch_add_relaxed (&_dl_tls_threads_in_update
, 1);
92 _dl_tls_allocate_end (void)
94 atomic_fetch_add_relaxed (&_dl_tls_threads_in_update
, -1);
98 _dl_tls_allocate_active (void)
100 return atomic_load_relaxed (&_dl_tls_threads_in_update
) > 0;
103 /* Compute the static TLS surplus based on the namespace count and the
104 TLS space that can be used for optimizations. */
106 tls_static_surplus (int nns
, int opt_tls
)
108 return (nns
- 1) * LIBC_IE_TLS
+ nns
* OTHER_IE_TLS
+ opt_tls
;
111 /* This value is chosen so that with default values for the tunables,
112 the computation of dl_tls_static_surplus in
113 _dl_tls_static_surplus_init yields the historic value 1664, for
114 backwards compatibility. */
115 #define LEGACY_TLS (1664 - tls_static_surplus (DEFAULT_NNS, OPTIONAL_TLS))
117 /* Calculate the size of the static TLS surplus, when the given
118 number of audit modules are loaded. Must be called after the
119 number of audit modules is known and before static TLS allocation. */
121 _dl_tls_static_surplus_init (size_t naudit
)
125 nns
= TUNABLE_GET (nns
, size_t, NULL
);
126 opt_tls
= TUNABLE_GET (optional_static_tls
, size_t, NULL
);
129 if (DL_NNS
- nns
< naudit
)
130 _dl_fatal_printf ("Failed loading %lu audit modules, %lu are supported.\n",
131 (unsigned long) naudit
, (unsigned long) (DL_NNS
- nns
));
134 GL(dl_tls_static_optional
) = opt_tls
;
135 assert (LEGACY_TLS
>= 0);
136 GLRO(dl_tls_static_surplus
) = tls_static_surplus (nns
, opt_tls
) + LEGACY_TLS
;
139 /* Out-of-memory handler. */
141 __attribute__ ((__noreturn__
))
144 _dl_fatal_printf ("cannot allocate memory for thread-local data: ABORT\n");
149 _dl_assign_tls_modid (struct link_map
*l
)
153 if (__builtin_expect (GL(dl_tls_dtv_gaps
), false))
156 struct dtv_slotinfo_list
*runp
= GL(dl_tls_dtv_slotinfo_list
);
158 /* Note that this branch will never be executed during program
159 start since there are no gaps at that time. Therefore it
160 does not matter that the dl_tls_dtv_slotinfo is not allocated
161 yet when the function is called for the first times.
163 NB: the offset +1 is due to the fact that DTV[0] is used
164 for something else. */
165 result
= GL(dl_tls_static_nelem
) + 1;
166 if (result
<= GL(dl_tls_max_dtv_idx
))
169 while (result
- disp
< runp
->len
)
171 if (runp
->slotinfo
[result
- disp
].map
== NULL
)
175 assert (result
<= GL(dl_tls_max_dtv_idx
) + 1);
178 if (result
- disp
< runp
->len
)
180 /* Mark the entry as used, so any dependency see it. */
181 atomic_store_relaxed (&runp
->slotinfo
[result
- disp
].map
, l
);
182 atomic_store_relaxed (&runp
->slotinfo
[result
- disp
].gen
, 0);
188 while ((runp
= runp
->next
) != NULL
);
190 if (result
> GL(dl_tls_max_dtv_idx
))
192 /* The new index must indeed be exactly one higher than the
194 assert (result
== GL(dl_tls_max_dtv_idx
) + 1);
195 /* There is no gap anymore. */
196 GL(dl_tls_dtv_gaps
) = false;
203 /* No gaps, allocate a new entry. */
206 result
= GL(dl_tls_max_dtv_idx
) + 1;
207 /* Can be read concurrently. */
208 atomic_store_relaxed (&GL(dl_tls_max_dtv_idx
), result
);
211 l
->l_tls_modid
= result
;
216 _dl_count_modids (void)
218 /* The count is the max unless dlclose or failed dlopen created gaps. */
219 if (__glibc_likely (!GL(dl_tls_dtv_gaps
)))
220 return GL(dl_tls_max_dtv_idx
);
222 /* We have gaps and are forced to count the non-NULL entries. */
224 struct dtv_slotinfo_list
*runp
= GL(dl_tls_dtv_slotinfo_list
);
227 for (size_t i
= 0; i
< runp
->len
; ++i
)
228 if (runp
->slotinfo
[i
].map
!= NULL
)
240 _dl_determine_tlsoffset (void)
242 size_t max_align
= TCB_ALIGNMENT
;
244 size_t freebottom
= 0;
246 /* The first element of the dtv slot info list is allocated. */
247 assert (GL(dl_tls_dtv_slotinfo_list
) != NULL
);
248 /* There is at this point only one element in the
249 dl_tls_dtv_slotinfo_list list. */
250 assert (GL(dl_tls_dtv_slotinfo_list
)->next
== NULL
);
252 struct dtv_slotinfo
*slotinfo
= GL(dl_tls_dtv_slotinfo_list
)->slotinfo
;
254 /* Determining the offset of the various parts of the static TLS
255 block has several dependencies. In addition we have to work
256 around bugs in some toolchains.
258 Each TLS block from the objects available at link time has a size
259 and an alignment requirement. The GNU ld computes the alignment
260 requirements for the data at the positions *in the file*, though.
261 I.e, it is not simply possible to allocate a block with the size
262 of the TLS program header entry. The data is laid out assuming
263 that the first byte of the TLS block fulfills
265 p_vaddr mod p_align == &TLS_BLOCK mod p_align
267 This means we have to add artificial padding at the beginning of
268 the TLS block. These bytes are never used for the TLS data in
269 this module but the first byte allocated must be aligned
270 according to mod p_align == 0 so that the first byte of the TLS
271 block is aligned according to p_vaddr mod p_align. This is ugly
272 and the linker can help by computing the offsets in the TLS block
273 assuming the first byte of the TLS block is aligned according to
276 The extra space which might be allocated before the first byte of
277 the TLS block need not go unused. The code below tries to use
278 that memory for the next TLS block. This can work if the total
279 memory requirement for the next TLS block is smaller than the
283 /* We simply start with zero. */
286 for (size_t cnt
= 0; slotinfo
[cnt
].map
!= NULL
; ++cnt
)
288 assert (cnt
< GL(dl_tls_dtv_slotinfo_list
)->len
);
290 size_t firstbyte
= (-slotinfo
[cnt
].map
->l_tls_firstbyte_offset
291 & (slotinfo
[cnt
].map
->l_tls_align
- 1));
293 max_align
= MAX (max_align
, slotinfo
[cnt
].map
->l_tls_align
);
295 if (freebottom
- freetop
>= slotinfo
[cnt
].map
->l_tls_blocksize
)
297 off
= roundup (freetop
+ slotinfo
[cnt
].map
->l_tls_blocksize
298 - firstbyte
, slotinfo
[cnt
].map
->l_tls_align
)
300 if (off
<= freebottom
)
304 /* XXX For some architectures we perhaps should store the
306 slotinfo
[cnt
].map
->l_tls_offset
= off
;
311 off
= roundup (offset
+ slotinfo
[cnt
].map
->l_tls_blocksize
- firstbyte
,
312 slotinfo
[cnt
].map
->l_tls_align
) + firstbyte
;
313 if (off
> offset
+ slotinfo
[cnt
].map
->l_tls_blocksize
314 + (freebottom
- freetop
))
317 freebottom
= off
- slotinfo
[cnt
].map
->l_tls_blocksize
;
321 /* XXX For some architectures we perhaps should store the
323 slotinfo
[cnt
].map
->l_tls_offset
= off
;
326 GL(dl_tls_static_used
) = offset
;
327 GLRO (dl_tls_static_size
) = (roundup (offset
+ GLRO(dl_tls_static_surplus
),
331 /* The TLS blocks start right after the TCB. */
332 size_t offset
= TLS_TCB_SIZE
;
334 for (size_t cnt
= 0; slotinfo
[cnt
].map
!= NULL
; ++cnt
)
336 assert (cnt
< GL(dl_tls_dtv_slotinfo_list
)->len
);
338 size_t firstbyte
= (-slotinfo
[cnt
].map
->l_tls_firstbyte_offset
339 & (slotinfo
[cnt
].map
->l_tls_align
- 1));
341 max_align
= MAX (max_align
, slotinfo
[cnt
].map
->l_tls_align
);
343 if (slotinfo
[cnt
].map
->l_tls_blocksize
<= freetop
- freebottom
)
345 off
= roundup (freebottom
, slotinfo
[cnt
].map
->l_tls_align
);
346 if (off
- freebottom
< firstbyte
)
347 off
+= slotinfo
[cnt
].map
->l_tls_align
;
348 if (off
+ slotinfo
[cnt
].map
->l_tls_blocksize
- firstbyte
<= freetop
)
350 slotinfo
[cnt
].map
->l_tls_offset
= off
- firstbyte
;
351 freebottom
= (off
+ slotinfo
[cnt
].map
->l_tls_blocksize
357 off
= roundup (offset
, slotinfo
[cnt
].map
->l_tls_align
);
358 if (off
- offset
< firstbyte
)
359 off
+= slotinfo
[cnt
].map
->l_tls_align
;
361 slotinfo
[cnt
].map
->l_tls_offset
= off
- firstbyte
;
362 if (off
- firstbyte
- offset
> freetop
- freebottom
)
365 freetop
= off
- firstbyte
;
368 offset
= off
+ slotinfo
[cnt
].map
->l_tls_blocksize
- firstbyte
;
371 GL(dl_tls_static_used
) = offset
;
372 GLRO (dl_tls_static_size
) = roundup (offset
+ GLRO(dl_tls_static_surplus
),
375 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
378 /* The alignment requirement for the static TLS block. */
379 GLRO (dl_tls_static_align
) = max_align
;
384 allocate_dtv (void *result
)
389 /* Relaxed MO, because the dtv size is later rechecked, not relied on. */
390 size_t max_modid
= atomic_load_relaxed (&GL(dl_tls_max_dtv_idx
));
391 /* We allocate a few more elements in the dtv than are needed for the
392 initial set of modules. This should avoid in most cases expansions
394 dtv_length
= max_modid
+ DTV_SURPLUS
;
395 dtv
= calloc (dtv_length
+ 2, sizeof (dtv_t
));
398 /* This is the initial length of the dtv. */
399 dtv
[0].counter
= dtv_length
;
401 /* The rest of the dtv (including the generation counter) is
402 Initialize with zero to indicate nothing there. */
404 /* Add the dtv to the thread data structures. */
405 INSTALL_DTV (result
, dtv
);
413 /* Get size and alignment requirements of the static TLS block. This
414 function is no longer used by glibc itself, but the GCC sanitizers
415 use it despite the GLIBC_PRIVATE status. */
417 _dl_get_tls_static_info (size_t *sizep
, size_t *alignp
)
419 *sizep
= GLRO (dl_tls_static_size
);
420 *alignp
= GLRO (dl_tls_static_align
);
423 /* Derive the location of the pointer to the start of the original
424 allocation (before alignment) from the pointer to the TCB. */
425 static inline void **
426 tcb_to_pointer_to_free_location (void *tcb
)
429 /* The TCB follows the TLS blocks, and the pointer to the front
431 void **original_pointer_location
= tcb
+ TLS_TCB_SIZE
;
433 /* The TCB comes first, preceded by the pre-TCB, and the pointer is
435 void **original_pointer_location
= tcb
- TLS_PRE_TCB_SIZE
- sizeof (void *);
437 return original_pointer_location
;
441 _dl_allocate_tls_storage (void)
444 size_t size
= GLRO (dl_tls_static_size
);
448 [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ]
449 ^ This should be returned. */
450 size
+= TLS_PRE_TCB_SIZE
;
453 /* Reserve space for the required alignment and the pointer to the
454 original allocation. */
455 size_t alignment
= GLRO (dl_tls_static_align
);
457 /* Perform the allocation. */
458 _dl_tls_allocate_begin ();
459 void *allocated
= malloc (size
+ alignment
+ sizeof (void *));
460 if (__glibc_unlikely (allocated
== NULL
))
462 _dl_tls_allocate_end ();
466 /* Perform alignment and allocate the DTV. */
468 /* The TCB follows the TLS blocks, which determine the alignment.
469 (TCB alignment requirements have been taken into account when
470 calculating GLRO (dl_tls_static_align).) */
471 void *aligned
= (void *) roundup ((uintptr_t) allocated
, alignment
);
472 result
= aligned
+ size
- TLS_TCB_SIZE
;
474 /* Clear the TCB data structure. We can't ask the caller (i.e.
475 libpthread) to do it, because we will initialize the DTV et al. */
476 memset (result
, '\0', TLS_TCB_SIZE
);
478 /* Pre-TCB and TCB come before the TLS blocks. The layout computed
479 in _dl_determine_tlsoffset assumes that the TCB is aligned to the
480 TLS block alignment, and not just the TLS blocks after it. This
481 can leave an unused alignment gap between the TCB and the TLS
483 result
= (void *) roundup
484 (sizeof (void *) + TLS_PRE_TCB_SIZE
+ (uintptr_t) allocated
,
487 /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before
488 it. We can't ask the caller (i.e. libpthread) to do it, because
489 we will initialize the DTV et al. */
490 memset (result
- TLS_PRE_TCB_SIZE
, '\0', TLS_PRE_TCB_SIZE
+ TLS_TCB_SIZE
);
493 /* Record the value of the original pointer for later
495 *tcb_to_pointer_to_free_location (result
) = allocated
;
497 result
= allocate_dtv (result
);
501 _dl_tls_allocate_end ();
507 extern dtv_t _dl_static_dtv
[];
508 # define _dl_initial_dtv (&_dl_static_dtv[1])
512 _dl_resize_dtv (dtv_t
*dtv
, size_t max_modid
)
514 /* Resize the dtv. */
516 size_t newsize
= max_modid
+ DTV_SURPLUS
;
517 size_t oldsize
= dtv
[-1].counter
;
519 _dl_tls_allocate_begin ();
520 if (dtv
== GL(dl_initial_dtv
))
522 /* This is the initial dtv that was either statically allocated in
523 __libc_setup_tls or allocated during rtld startup using the
524 dl-minimal.c malloc instead of the real malloc. We can't free
525 it, we have to abandon the old storage. */
527 newp
= malloc ((2 + newsize
) * sizeof (dtv_t
));
530 memcpy (newp
, &dtv
[-1], (2 + oldsize
) * sizeof (dtv_t
));
534 newp
= realloc (&dtv
[-1],
535 (2 + newsize
) * sizeof (dtv_t
));
539 _dl_tls_allocate_end ();
541 newp
[0].counter
= newsize
;
543 /* Clear the newly allocated part. */
544 memset (newp
+ 2 + oldsize
, '\0',
545 (newsize
- oldsize
) * sizeof (dtv_t
));
547 /* Return the generation counter. */
552 /* Allocate initial TLS. RESULT should be a non-NULL pointer to storage
553 for the TLS space. The DTV may be resized, and so this function may
554 call malloc to allocate that space. The loader's GL(dl_load_tls_lock)
555 is taken when manipulating global TLS-related data in the loader. */
557 _dl_allocate_tls_init (void *result
, bool init_tls
)
560 /* The memory allocation failed. */
563 dtv_t
*dtv
= GET_DTV (result
);
564 struct dtv_slotinfo_list
*listp
;
568 /* Protects global dynamic TLS related state. */
569 __rtld_lock_lock_recursive (GL(dl_load_tls_lock
));
571 /* Check if the current dtv is big enough. */
572 if (dtv
[-1].counter
< GL(dl_tls_max_dtv_idx
))
574 /* Resize the dtv. */
575 dtv
= _dl_resize_dtv (dtv
, GL(dl_tls_max_dtv_idx
));
577 /* Install this new dtv in the thread data structures. */
578 INSTALL_DTV (result
, &dtv
[-1]);
581 /* We have to prepare the dtv for all currently loaded modules using
582 TLS. For those which are dynamically loaded we add the values
583 indicating deferred allocation. */
584 listp
= GL(dl_tls_dtv_slotinfo_list
);
589 for (cnt
= total
== 0 ? 1 : 0; cnt
< listp
->len
; ++cnt
)
591 struct link_map
*map
;
594 /* Check for the total number of used slots. */
595 if (total
+ cnt
> GL(dl_tls_max_dtv_idx
))
598 map
= listp
->slotinfo
[cnt
].map
;
603 /* Keep track of the maximum generation number. This might
604 not be the generation counter. */
605 assert (listp
->slotinfo
[cnt
].gen
<= GL(dl_tls_generation
));
606 maxgen
= MAX (maxgen
, listp
->slotinfo
[cnt
].gen
);
608 dtv
[map
->l_tls_modid
].pointer
.val
= TLS_DTV_UNALLOCATED
;
609 dtv
[map
->l_tls_modid
].pointer
.to_free
= NULL
;
611 if (map
->l_tls_offset
== NO_TLS_OFFSET
612 || map
->l_tls_offset
== FORCED_DYNAMIC_TLS_OFFSET
)
615 assert (map
->l_tls_modid
== total
+ cnt
);
616 assert (map
->l_tls_blocksize
>= map
->l_tls_initimage_size
);
618 assert ((size_t) map
->l_tls_offset
>= map
->l_tls_blocksize
);
619 dest
= (char *) result
- map
->l_tls_offset
;
621 dest
= (char *) result
+ map
->l_tls_offset
;
623 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
626 /* Set up the DTV entry. The simplified __tls_get_addr that
627 some platforms use in static programs requires it. */
628 dtv
[map
->l_tls_modid
].pointer
.val
= dest
;
630 /* Copy the initialization image and clear the BSS part. For
631 audit modules or dependencies with initial-exec TLS, we can not
632 set the initial TLS image on default loader initialization
633 because it would already be set by the audit setup. However,
634 subsequent thread creation would need to follow the default
636 if (map
->l_ns
!= LM_ID_BASE
&& !init_tls
)
638 memset (__mempcpy (dest
, map
->l_tls_initimage
,
639 map
->l_tls_initimage_size
), '\0',
640 map
->l_tls_blocksize
- map
->l_tls_initimage_size
);
644 if (total
> GL(dl_tls_max_dtv_idx
))
648 assert (listp
!= NULL
);
650 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock
));
652 /* The DTV version is up-to-date now. */
653 dtv
[0].counter
= maxgen
;
657 rtld_hidden_def (_dl_allocate_tls_init
)
660 _dl_allocate_tls (void *mem
)
662 return _dl_allocate_tls_init (mem
== NULL
663 ? _dl_allocate_tls_storage ()
664 : allocate_dtv (mem
), true);
666 rtld_hidden_def (_dl_allocate_tls
)
670 _dl_deallocate_tls (void *tcb
, bool dealloc_tcb
)
672 dtv_t
*dtv
= GET_DTV (tcb
);
674 /* We need to free the memory allocated for non-static TLS. */
675 for (size_t cnt
= 0; cnt
< dtv
[-1].counter
; ++cnt
)
676 free (dtv
[1 + cnt
].pointer
.to_free
);
678 /* The array starts with dtv[-1]. */
679 if (dtv
!= GL(dl_initial_dtv
))
683 free (*tcb_to_pointer_to_free_location (tcb
));
685 rtld_hidden_def (_dl_deallocate_tls
)
689 /* The __tls_get_addr function has two basic forms which differ in the
690 arguments. The IA-64 form takes two parameters, the module ID and
691 offset. The form used, among others, on IA-32 takes a reference to
692 a special structure which contain the same information. The second
693 form seems to be more often used (in the moment) so we default to
694 it. Users of the IA-64 form have to provide adequate definitions
695 of the following macros. */
696 # ifndef GET_ADDR_ARGS
697 # define GET_ADDR_ARGS tls_index *ti
698 # define GET_ADDR_PARAM ti
700 # ifndef GET_ADDR_MODULE
701 # define GET_ADDR_MODULE ti->ti_module
703 # ifndef GET_ADDR_OFFSET
704 # define GET_ADDR_OFFSET ti->ti_offset
707 /* Allocate one DTV entry. */
708 static struct dtv_pointer
709 allocate_dtv_entry (size_t alignment
, size_t size
)
711 if (powerof2 (alignment
) && alignment
<= _Alignof (max_align_t
))
713 /* The alignment is supported by malloc. */
714 _dl_tls_allocate_begin ();
715 void *ptr
= malloc (size
);
716 _dl_tls_allocate_end ();
717 return (struct dtv_pointer
) { ptr
, ptr
};
720 /* Emulate memalign to by manually aligning a pointer returned by
721 malloc. First compute the size with an overflow check. */
722 size_t alloc_size
= size
+ alignment
;
723 if (alloc_size
< size
)
724 return (struct dtv_pointer
) {};
726 /* Perform the allocation. This is the pointer we need to free
728 _dl_tls_allocate_begin ();
729 void *start
= malloc (alloc_size
);
730 _dl_tls_allocate_end ();
733 return (struct dtv_pointer
) {};
735 /* Find the aligned position within the larger allocation. */
736 void *aligned
= (void *) roundup ((uintptr_t) start
, alignment
);
738 return (struct dtv_pointer
) { .val
= aligned
, .to_free
= start
};
741 static struct dtv_pointer
742 allocate_and_init (struct link_map
*map
)
744 struct dtv_pointer result
= allocate_dtv_entry
745 (map
->l_tls_align
, map
->l_tls_blocksize
);
746 if (result
.val
== NULL
)
749 /* Initialize the memory. */
750 memset (__mempcpy (result
.val
, map
->l_tls_initimage
,
751 map
->l_tls_initimage_size
),
752 '\0', map
->l_tls_blocksize
- map
->l_tls_initimage_size
);
759 _dl_update_slotinfo (unsigned long int req_modid
, size_t new_gen
)
761 struct link_map
*the_map
= NULL
;
762 dtv_t
*dtv
= THREAD_DTV ();
764 /* CONCURRENCY NOTES:
766 The global dl_tls_dtv_slotinfo_list array contains for each module
767 index the generation counter current when that entry was updated.
768 This array never shrinks so that all module indices which were
769 valid at some time can be used to access it. Concurrent loading
770 and unloading of modules can update slotinfo entries or extend
771 the array. The updates happen under the GL(dl_load_tls_lock) and
772 finish with the release store of the generation counter to
773 GL(dl_tls_generation) which is synchronized with the load of
774 new_gen in the caller. So updates up to new_gen are synchronized
775 but updates for later generations may not be.
777 Here we update the thread dtv from old_gen (== dtv[0].counter) to
778 new_gen generation. For this, each dtv[i] entry is either set to
779 an unallocated state (set), or left unmodified (nop). Where (set)
780 may resize the dtv first if modid i >= dtv[-1].counter. The rules
781 for the decision between (set) and (nop) are
783 (1) If slotinfo entry i is concurrently updated then either (set)
784 or (nop) is valid: TLS access cannot use dtv[i] unless it is
785 synchronized with a generation > new_gen.
787 Otherwise, if the generation of slotinfo entry i is gen and the
788 loaded module for this entry is map then
790 (2) If gen <= old_gen then do (nop).
792 (3) If old_gen < gen <= new_gen then
793 (3.1) if map != 0 then (set)
794 (3.2) if map == 0 then either (set) or (nop).
796 Note that (1) cannot be reliably detected, but since both actions
797 are valid it does not have to be. Only (2) and (3.1) cases need
798 to be distinguished for which relaxed mo access of gen and map is
799 enough: their value is synchronized when it matters.
801 Note that a relaxed mo load may give an out-of-thin-air value since
802 it is used in decisions that can affect concurrent stores. But this
803 should only happen if the OOTA value causes UB that justifies the
804 concurrent store of the value. This is not expected to be an issue
806 struct dtv_slotinfo_list
*listp
= GL(dl_tls_dtv_slotinfo_list
);
808 if (dtv
[0].counter
< new_gen
)
811 size_t max_modid
= atomic_load_relaxed (&GL(dl_tls_max_dtv_idx
));
812 assert (max_modid
>= req_modid
);
814 /* We have to look through the entire dtv slotinfo list. */
815 listp
= GL(dl_tls_dtv_slotinfo_list
);
818 for (size_t cnt
= total
== 0 ? 1 : 0; cnt
< listp
->len
; ++cnt
)
820 size_t modid
= total
+ cnt
;
822 /* Case (1) for all later modids. */
823 if (modid
> max_modid
)
826 size_t gen
= atomic_load_relaxed (&listp
->slotinfo
[cnt
].gen
);
832 /* Case (2) or (1). */
833 if (gen
<= dtv
[0].counter
)
836 /* Case (3) or (1). */
838 /* If there is no map this means the entry is empty. */
840 = atomic_load_relaxed (&listp
->slotinfo
[cnt
].map
);
841 /* Check whether the current dtv array is large enough. */
842 if (dtv
[-1].counter
< modid
)
844 /* Case (3.2) or (1). */
848 /* Resizing the dtv aborts on failure: bug 16134. */
849 dtv
= _dl_resize_dtv (dtv
, max_modid
);
851 assert (modid
<= dtv
[-1].counter
);
853 /* Install this new dtv in the thread data
855 INSTALL_NEW_DTV (dtv
);
858 /* If there is currently memory allocate for this
859 dtv entry free it. Note: this is not AS-safe. */
860 /* XXX Ideally we will at some point create a memory
862 /* Avoid calling free on a null pointer. Some mallocs
863 incorrectly use dynamic TLS, and depending on how the
864 free function was compiled, it could call
865 __tls_get_addr before the null pointer check in the
866 free implementation. Checking here papers over at
867 least some dynamic TLS usage by interposed mallocs. */
868 if (dtv
[modid
].pointer
.to_free
!= NULL
)
870 _dl_tls_allocate_begin ();
871 free (dtv
[modid
].pointer
.to_free
);
872 _dl_tls_allocate_end ();
874 dtv
[modid
].pointer
.val
= TLS_DTV_UNALLOCATED
;
875 dtv
[modid
].pointer
.to_free
= NULL
;
877 if (modid
== req_modid
)
882 if (total
> max_modid
)
885 /* Synchronize with _dl_add_to_slotinfo. Ideally this would
886 be consume MO since we only need to order the accesses to
887 the next node after the read of the address and on most
888 hardware (other than alpha) a normal load would do that
889 because of the address dependency. */
890 listp
= atomic_load_acquire (&listp
->next
);
892 while (listp
!= NULL
);
894 /* This will be the new maximum generation counter. */
895 dtv
[0].counter
= new_gen
;
903 __attribute_noinline__
904 tls_get_addr_tail (GET_ADDR_ARGS
, dtv_t
*dtv
, struct link_map
*the_map
)
906 /* The allocation was deferred. Do it now. */
909 /* Find the link map for this module. */
910 size_t idx
= GET_ADDR_MODULE
;
911 struct dtv_slotinfo_list
*listp
= GL(dl_tls_dtv_slotinfo_list
);
913 while (idx
>= listp
->len
)
919 the_map
= listp
->slotinfo
[idx
].map
;
922 /* Make sure that, if a dlopen running in parallel forces the
923 variable into static storage, we'll wait until the address in the
924 static TLS block is set up, and use that. If we're undecided
925 yet, make sure we make the decision holding the lock as well. */
926 if (__glibc_unlikely (the_map
->l_tls_offset
927 != FORCED_DYNAMIC_TLS_OFFSET
))
929 __rtld_lock_lock_recursive (GL(dl_load_tls_lock
));
930 if (__glibc_likely (the_map
->l_tls_offset
== NO_TLS_OFFSET
))
932 the_map
->l_tls_offset
= FORCED_DYNAMIC_TLS_OFFSET
;
933 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock
));
935 else if (__glibc_likely (the_map
->l_tls_offset
936 != FORCED_DYNAMIC_TLS_OFFSET
))
939 void *p
= (char *) THREAD_SELF
- the_map
->l_tls_offset
;
941 void *p
= (char *) THREAD_SELF
+ the_map
->l_tls_offset
+ TLS_PRE_TCB_SIZE
;
943 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
945 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock
));
947 dtv
[GET_ADDR_MODULE
].pointer
.to_free
= NULL
;
948 dtv
[GET_ADDR_MODULE
].pointer
.val
= p
;
950 return (char *) p
+ GET_ADDR_OFFSET
;
953 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock
));
955 struct dtv_pointer result
= allocate_and_init (the_map
);
956 dtv
[GET_ADDR_MODULE
].pointer
= result
;
957 assert (result
.to_free
!= NULL
);
959 return (char *) result
.val
+ GET_ADDR_OFFSET
;
963 static struct link_map
*
964 __attribute_noinline__
965 update_get_addr (GET_ADDR_ARGS
, size_t gen
)
967 struct link_map
*the_map
= _dl_update_slotinfo (GET_ADDR_MODULE
, gen
);
968 dtv_t
*dtv
= THREAD_DTV ();
970 void *p
= dtv
[GET_ADDR_MODULE
].pointer
.val
;
972 if (__glibc_unlikely (p
== TLS_DTV_UNALLOCATED
))
973 return tls_get_addr_tail (GET_ADDR_PARAM
, dtv
, the_map
);
975 return (void *) p
+ GET_ADDR_OFFSET
;
978 /* For all machines that have a non-macro version of __tls_get_addr, we
979 want to use rtld_hidden_proto/rtld_hidden_def in order to call the
980 internal alias for __tls_get_addr from ld.so. This avoids a PLT entry
981 in ld.so for __tls_get_addr. */
983 #ifndef __tls_get_addr
984 extern void * __tls_get_addr (GET_ADDR_ARGS
);
985 rtld_hidden_proto (__tls_get_addr
)
986 rtld_hidden_def (__tls_get_addr
)
989 /* The generic dynamic and local dynamic model cannot be used in
990 statically linked applications. */
992 __tls_get_addr (GET_ADDR_ARGS
)
994 dtv_t
*dtv
= THREAD_DTV ();
996 /* Update is needed if dtv[0].counter < the generation of the accessed
997 module, but the global generation counter is easier to check (which
998 must be synchronized up to the generation of the accessed module by
999 user code doing the TLS access so relaxed mo read is enough). */
1000 size_t gen
= atomic_load_relaxed (&GL(dl_tls_generation
));
1001 if (__glibc_unlikely (dtv
[0].counter
!= gen
))
1003 if (_dl_tls_allocate_active ()
1004 && GET_ADDR_MODULE
< _dl_tls_initial_modid_limit
)
1005 /* This is a reentrant __tls_get_addr call, but we can
1006 satisfy it because it's an initially-loaded module ID.
1007 These TLS slotinfo slots do not change, so the
1008 out-of-date generation counter does not matter. However,
1009 if not in a TLS update, still update_get_addr below, to
1010 get off the slow path eventually. */
1014 /* Update DTV up to the global generation, see CONCURRENCY NOTES
1015 in _dl_update_slotinfo. */
1016 gen
= atomic_load_acquire (&GL(dl_tls_generation
));
1017 return update_get_addr (GET_ADDR_PARAM
, gen
);
1021 void *p
= dtv
[GET_ADDR_MODULE
].pointer
.val
;
1023 if (__glibc_unlikely (p
== TLS_DTV_UNALLOCATED
))
1024 return tls_get_addr_tail (GET_ADDR_PARAM
, dtv
, NULL
);
1026 return (char *) p
+ GET_ADDR_OFFSET
;
1031 /* Look up the module's TLS block as for __tls_get_addr,
1032 but never touch anything. Return null if it's not allocated yet. */
1034 _dl_tls_get_addr_soft (struct link_map
*l
)
1036 if (__glibc_unlikely (l
->l_tls_modid
== 0))
1037 /* This module has no TLS segment. */
1040 dtv_t
*dtv
= THREAD_DTV ();
1041 /* This may be called without holding the GL(dl_load_tls_lock). Reading
1042 arbitrary gen value is fine since this is best effort code. */
1043 size_t gen
= atomic_load_relaxed (&GL(dl_tls_generation
));
1044 if (__glibc_unlikely (dtv
[0].counter
!= gen
))
1046 /* This thread's DTV is not completely current,
1047 but it might already cover this module. */
1049 if (l
->l_tls_modid
>= dtv
[-1].counter
)
1053 size_t idx
= l
->l_tls_modid
;
1054 struct dtv_slotinfo_list
*listp
= GL(dl_tls_dtv_slotinfo_list
);
1055 while (idx
>= listp
->len
)
1058 listp
= listp
->next
;
1061 /* We've reached the slot for this module.
1062 If its generation counter is higher than the DTV's,
1063 this thread does not know about this module yet. */
1064 if (dtv
[0].counter
< listp
->slotinfo
[idx
].gen
)
1068 void *data
= dtv
[l
->l_tls_modid
].pointer
.val
;
1069 if (__glibc_unlikely (data
== TLS_DTV_UNALLOCATED
))
1070 /* The DTV is current, but this thread has not yet needed
1071 to allocate this module's segment. */
1077 size_t _dl_tls_initial_modid_limit
;
1080 _dl_tls_initial_modid_limit_setup (void)
1082 struct dtv_slotinfo_list
*listp
= GL(dl_tls_dtv_slotinfo_list
);
1084 for (idx
= 0; idx
< listp
->len
; ++idx
)
1086 struct link_map
*l
= listp
->slotinfo
[idx
].map
;
1088 /* The object can be unloaded, so its modid can be
1090 || !(l
->l_type
== lt_executable
|| l
->l_type
== lt_library
))
1093 _dl_tls_initial_modid_limit
= idx
;
1098 _dl_add_to_slotinfo (struct link_map
*l
, bool do_add
)
1100 /* Now that we know the object is loaded successfully add
1101 modules containing TLS data to the dtv info table. We
1102 might have to increase its size. */
1103 struct dtv_slotinfo_list
*listp
;
1104 struct dtv_slotinfo_list
*prevp
;
1105 size_t idx
= l
->l_tls_modid
;
1107 /* Find the place in the dtv slotinfo list. */
1108 listp
= GL(dl_tls_dtv_slotinfo_list
);
1109 prevp
= NULL
; /* Needed to shut up gcc. */
1112 /* Does it fit in the array of this list element? */
1113 if (idx
< listp
->len
)
1117 listp
= listp
->next
;
1119 while (listp
!= NULL
);
1123 /* When we come here it means we have to add a new element
1124 to the slotinfo list. And the new module must be in
1128 _dl_tls_allocate_begin ();
1129 listp
= (struct dtv_slotinfo_list
*)
1130 malloc (sizeof (struct dtv_slotinfo_list
)
1131 + TLS_SLOTINFO_SURPLUS
* sizeof (struct dtv_slotinfo
));
1132 _dl_tls_allocate_end ();
1135 /* We ran out of memory while resizing the dtv slotinfo list. */
1136 _dl_signal_error (ENOMEM
, "dlopen", NULL
, N_("\
1137 cannot create TLS data structures"));
1140 listp
->len
= TLS_SLOTINFO_SURPLUS
;
1142 memset (listp
->slotinfo
, '\0',
1143 TLS_SLOTINFO_SURPLUS
* sizeof (struct dtv_slotinfo
));
1144 /* Synchronize with _dl_update_slotinfo. */
1145 atomic_store_release (&prevp
->next
, listp
);
1148 /* Add the information into the slotinfo data structure. */
1151 /* Can be read concurrently. See _dl_update_slotinfo. */
1152 atomic_store_relaxed (&listp
->slotinfo
[idx
].map
, l
);
1153 atomic_store_relaxed (&listp
->slotinfo
[idx
].gen
,
1154 GL(dl_tls_generation
) + 1);
1159 static inline void __attribute__((always_inline
))
1160 init_one_static_tls (struct pthread
*curp
, struct link_map
*map
)
1163 void *dest
= (char *) curp
- map
->l_tls_offset
;
1164 # elif TLS_DTV_AT_TP
1165 void *dest
= (char *) curp
+ map
->l_tls_offset
+ TLS_PRE_TCB_SIZE
;
1167 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1170 /* Initialize the memory. */
1171 memset (__mempcpy (dest
, map
->l_tls_initimage
, map
->l_tls_initimage_size
),
1172 '\0', map
->l_tls_blocksize
- map
->l_tls_initimage_size
);
1176 _dl_init_static_tls (struct link_map
*map
)
1178 lll_lock (GL (dl_stack_cache_lock
), LLL_PRIVATE
);
1180 /* Iterate over the list with system-allocated threads first. */
1182 list_for_each (runp
, &GL (dl_stack_used
))
1183 init_one_static_tls (list_entry (runp
, struct pthread
, list
), map
);
1185 /* Now the list with threads using user-allocated stacks. */
1186 list_for_each (runp
, &GL (dl_stack_user
))
1187 init_one_static_tls (list_entry (runp
, struct pthread
, list
), map
);
1189 lll_unlock (GL (dl_stack_cache_lock
), LLL_PRIVATE
);
1191 #endif /* PTHREAD_IN_LIBC */