]> sourceware.org Git - glibc.git/blob - elf/dl-tls.c
Revert "LoongArch: Add cfi instructions for _dl_tlsdesc_dynamic"
[glibc.git] / elf / dl-tls.c
1 /* Thread-local storage handling in the ELF dynamic linker. Generic version.
2 Copyright (C) 2002-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #include <assert.h>
20 #include <errno.h>
21 #include <libintl.h>
22 #include <signal.h>
23 #include <stdlib.h>
24 #include <unistd.h>
25 #include <sys/param.h>
26 #include <atomic.h>
27
28 #include <tls.h>
29 #include <dl-tls.h>
30 #include <ldsodefs.h>
31
32 #if PTHREAD_IN_LIBC
33 # include <list.h>
34 #endif
35
36 #define TUNABLE_NAMESPACE rtld
37 #include <dl-tunables.h>
38
39 /* Surplus static TLS, GLRO(dl_tls_static_surplus), is used for
40
41 - IE TLS in libc.so for all dlmopen namespaces except in the initial
42 one where libc.so is not loaded dynamically but at startup time,
43 - IE TLS in other libraries which may be dynamically loaded even in the
44 initial namespace,
45 - and optionally for optimizing dynamic TLS access.
46
47 The maximum number of namespaces is DL_NNS, but to support that many
48 namespaces correctly the static TLS allocation should be significantly
49 increased, which may cause problems with small thread stacks due to the
50 way static TLS is accounted (bug 11787).
51
52 So there is a rtld.nns tunable limit on the number of supported namespaces
53 that affects the size of the static TLS and by default it's small enough
54 not to cause problems with existing applications. The limit is not
55 enforced or checked: it is the user's responsibility to increase rtld.nns
56 if more dlmopen namespaces are used.
57
58 Audit modules use their own namespaces, they are not included in rtld.nns,
59 but come on top when computing the number of namespaces. */
60
61 /* Size of initial-exec TLS in libc.so. This should be the maximum of
62 observed PT_GNU_TLS sizes across all architectures. Some
63 architectures have lower values due to differences in type sizes
64 and link editor capabilities. */
65 #define LIBC_IE_TLS 144
66
67 /* Size of initial-exec TLS in libraries other than libc.so.
68 This should be large enough to cover runtime libraries of the
69 compiler such as libgomp and libraries in libc other than libc.so. */
70 #define OTHER_IE_TLS 144
71
72 /* Default number of namespaces. */
73 #define DEFAULT_NNS 4
74
75 /* Default for dl_tls_static_optional. */
76 #define OPTIONAL_TLS 512
77
78 /* Used to count the number of threads currently executing dynamic TLS
79 updates. Used to avoid recursive malloc calls in __tls_get_addr
80 for an interposed malloc that uses global-dynamic TLS (which is not
81 recommended); see _dl_tls_allocate_active checks. This could be a
82 per-thread flag, but would need TLS access in the dynamic linker. */
83 unsigned int _dl_tls_threads_in_update;
84
85 static inline void
86 _dl_tls_allocate_begin (void)
87 {
88 atomic_fetch_add_relaxed (&_dl_tls_threads_in_update, 1);
89 }
90
91 static inline void
92 _dl_tls_allocate_end (void)
93 {
94 atomic_fetch_add_relaxed (&_dl_tls_threads_in_update, -1);
95 }
96
97 static inline bool
98 _dl_tls_allocate_active (void)
99 {
100 return atomic_load_relaxed (&_dl_tls_threads_in_update) > 0;
101 }
102
103 /* Compute the static TLS surplus based on the namespace count and the
104 TLS space that can be used for optimizations. */
105 static inline int
106 tls_static_surplus (int nns, int opt_tls)
107 {
108 return (nns - 1) * LIBC_IE_TLS + nns * OTHER_IE_TLS + opt_tls;
109 }
110
111 /* This value is chosen so that with default values for the tunables,
112 the computation of dl_tls_static_surplus in
113 _dl_tls_static_surplus_init yields the historic value 1664, for
114 backwards compatibility. */
115 #define LEGACY_TLS (1664 - tls_static_surplus (DEFAULT_NNS, OPTIONAL_TLS))
116
117 /* Calculate the size of the static TLS surplus, when the given
118 number of audit modules are loaded. Must be called after the
119 number of audit modules is known and before static TLS allocation. */
120 void
121 _dl_tls_static_surplus_init (size_t naudit)
122 {
123 size_t nns, opt_tls;
124
125 nns = TUNABLE_GET (nns, size_t, NULL);
126 opt_tls = TUNABLE_GET (optional_static_tls, size_t, NULL);
127 if (nns > DL_NNS)
128 nns = DL_NNS;
129 if (DL_NNS - nns < naudit)
130 _dl_fatal_printf ("Failed loading %lu audit modules, %lu are supported.\n",
131 (unsigned long) naudit, (unsigned long) (DL_NNS - nns));
132 nns += naudit;
133
134 GL(dl_tls_static_optional) = opt_tls;
135 assert (LEGACY_TLS >= 0);
136 GLRO(dl_tls_static_surplus) = tls_static_surplus (nns, opt_tls) + LEGACY_TLS;
137 }
138
139 /* Out-of-memory handler. */
140 static void
141 __attribute__ ((__noreturn__))
142 oom (void)
143 {
144 _dl_fatal_printf ("cannot allocate memory for thread-local data: ABORT\n");
145 }
146
147
148 void
149 _dl_assign_tls_modid (struct link_map *l)
150 {
151 size_t result;
152
153 if (__builtin_expect (GL(dl_tls_dtv_gaps), false))
154 {
155 size_t disp = 0;
156 struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
157
158 /* Note that this branch will never be executed during program
159 start since there are no gaps at that time. Therefore it
160 does not matter that the dl_tls_dtv_slotinfo is not allocated
161 yet when the function is called for the first times.
162
163 NB: the offset +1 is due to the fact that DTV[0] is used
164 for something else. */
165 result = GL(dl_tls_static_nelem) + 1;
166 if (result <= GL(dl_tls_max_dtv_idx))
167 do
168 {
169 while (result - disp < runp->len)
170 {
171 if (runp->slotinfo[result - disp].map == NULL)
172 break;
173
174 ++result;
175 assert (result <= GL(dl_tls_max_dtv_idx) + 1);
176 }
177
178 if (result - disp < runp->len)
179 {
180 /* Mark the entry as used, so any dependency see it. */
181 atomic_store_relaxed (&runp->slotinfo[result - disp].map, l);
182 atomic_store_relaxed (&runp->slotinfo[result - disp].gen, 0);
183 break;
184 }
185
186 disp += runp->len;
187 }
188 while ((runp = runp->next) != NULL);
189
190 if (result > GL(dl_tls_max_dtv_idx))
191 {
192 /* The new index must indeed be exactly one higher than the
193 previous high. */
194 assert (result == GL(dl_tls_max_dtv_idx) + 1);
195 /* There is no gap anymore. */
196 GL(dl_tls_dtv_gaps) = false;
197
198 goto nogaps;
199 }
200 }
201 else
202 {
203 /* No gaps, allocate a new entry. */
204 nogaps:
205
206 result = GL(dl_tls_max_dtv_idx) + 1;
207 /* Can be read concurrently. */
208 atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), result);
209 }
210
211 l->l_tls_modid = result;
212 }
213
214
215 size_t
216 _dl_count_modids (void)
217 {
218 /* The count is the max unless dlclose or failed dlopen created gaps. */
219 if (__glibc_likely (!GL(dl_tls_dtv_gaps)))
220 return GL(dl_tls_max_dtv_idx);
221
222 /* We have gaps and are forced to count the non-NULL entries. */
223 size_t n = 0;
224 struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
225 while (runp != NULL)
226 {
227 for (size_t i = 0; i < runp->len; ++i)
228 if (runp->slotinfo[i].map != NULL)
229 ++n;
230
231 runp = runp->next;
232 }
233
234 return n;
235 }
236
237
238 #ifdef SHARED
239 void
240 _dl_determine_tlsoffset (void)
241 {
242 size_t max_align = TCB_ALIGNMENT;
243 size_t freetop = 0;
244 size_t freebottom = 0;
245
246 /* The first element of the dtv slot info list is allocated. */
247 assert (GL(dl_tls_dtv_slotinfo_list) != NULL);
248 /* There is at this point only one element in the
249 dl_tls_dtv_slotinfo_list list. */
250 assert (GL(dl_tls_dtv_slotinfo_list)->next == NULL);
251
252 struct dtv_slotinfo *slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
253
254 /* Determining the offset of the various parts of the static TLS
255 block has several dependencies. In addition we have to work
256 around bugs in some toolchains.
257
258 Each TLS block from the objects available at link time has a size
259 and an alignment requirement. The GNU ld computes the alignment
260 requirements for the data at the positions *in the file*, though.
261 I.e, it is not simply possible to allocate a block with the size
262 of the TLS program header entry. The data is laid out assuming
263 that the first byte of the TLS block fulfills
264
265 p_vaddr mod p_align == &TLS_BLOCK mod p_align
266
267 This means we have to add artificial padding at the beginning of
268 the TLS block. These bytes are never used for the TLS data in
269 this module but the first byte allocated must be aligned
270 according to mod p_align == 0 so that the first byte of the TLS
271 block is aligned according to p_vaddr mod p_align. This is ugly
272 and the linker can help by computing the offsets in the TLS block
273 assuming the first byte of the TLS block is aligned according to
274 p_align.
275
276 The extra space which might be allocated before the first byte of
277 the TLS block need not go unused. The code below tries to use
278 that memory for the next TLS block. This can work if the total
279 memory requirement for the next TLS block is smaller than the
280 gap. */
281
282 #if TLS_TCB_AT_TP
283 /* We simply start with zero. */
284 size_t offset = 0;
285
286 for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
287 {
288 assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
289
290 size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
291 & (slotinfo[cnt].map->l_tls_align - 1));
292 size_t off;
293 max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
294
295 if (freebottom - freetop >= slotinfo[cnt].map->l_tls_blocksize)
296 {
297 off = roundup (freetop + slotinfo[cnt].map->l_tls_blocksize
298 - firstbyte, slotinfo[cnt].map->l_tls_align)
299 + firstbyte;
300 if (off <= freebottom)
301 {
302 freetop = off;
303
304 /* XXX For some architectures we perhaps should store the
305 negative offset. */
306 slotinfo[cnt].map->l_tls_offset = off;
307 continue;
308 }
309 }
310
311 off = roundup (offset + slotinfo[cnt].map->l_tls_blocksize - firstbyte,
312 slotinfo[cnt].map->l_tls_align) + firstbyte;
313 if (off > offset + slotinfo[cnt].map->l_tls_blocksize
314 + (freebottom - freetop))
315 {
316 freetop = offset;
317 freebottom = off - slotinfo[cnt].map->l_tls_blocksize;
318 }
319 offset = off;
320
321 /* XXX For some architectures we perhaps should store the
322 negative offset. */
323 slotinfo[cnt].map->l_tls_offset = off;
324 }
325
326 GL(dl_tls_static_used) = offset;
327 GLRO (dl_tls_static_size) = (roundup (offset + GLRO(dl_tls_static_surplus),
328 max_align)
329 + TLS_TCB_SIZE);
330 #elif TLS_DTV_AT_TP
331 /* The TLS blocks start right after the TCB. */
332 size_t offset = TLS_TCB_SIZE;
333
334 for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
335 {
336 assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
337
338 size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
339 & (slotinfo[cnt].map->l_tls_align - 1));
340 size_t off;
341 max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
342
343 if (slotinfo[cnt].map->l_tls_blocksize <= freetop - freebottom)
344 {
345 off = roundup (freebottom, slotinfo[cnt].map->l_tls_align);
346 if (off - freebottom < firstbyte)
347 off += slotinfo[cnt].map->l_tls_align;
348 if (off + slotinfo[cnt].map->l_tls_blocksize - firstbyte <= freetop)
349 {
350 slotinfo[cnt].map->l_tls_offset = off - firstbyte;
351 freebottom = (off + slotinfo[cnt].map->l_tls_blocksize
352 - firstbyte);
353 continue;
354 }
355 }
356
357 off = roundup (offset, slotinfo[cnt].map->l_tls_align);
358 if (off - offset < firstbyte)
359 off += slotinfo[cnt].map->l_tls_align;
360
361 slotinfo[cnt].map->l_tls_offset = off - firstbyte;
362 if (off - firstbyte - offset > freetop - freebottom)
363 {
364 freebottom = offset;
365 freetop = off - firstbyte;
366 }
367
368 offset = off + slotinfo[cnt].map->l_tls_blocksize - firstbyte;
369 }
370
371 GL(dl_tls_static_used) = offset;
372 GLRO (dl_tls_static_size) = roundup (offset + GLRO(dl_tls_static_surplus),
373 TCB_ALIGNMENT);
374 #else
375 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
376 #endif
377
378 /* The alignment requirement for the static TLS block. */
379 GLRO (dl_tls_static_align) = max_align;
380 }
381 #endif /* SHARED */
382
383 static void *
384 allocate_dtv (void *result)
385 {
386 dtv_t *dtv;
387 size_t dtv_length;
388
389 /* Relaxed MO, because the dtv size is later rechecked, not relied on. */
390 size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
391 /* We allocate a few more elements in the dtv than are needed for the
392 initial set of modules. This should avoid in most cases expansions
393 of the dtv. */
394 dtv_length = max_modid + DTV_SURPLUS;
395 dtv = calloc (dtv_length + 2, sizeof (dtv_t));
396 if (dtv != NULL)
397 {
398 /* This is the initial length of the dtv. */
399 dtv[0].counter = dtv_length;
400
401 /* The rest of the dtv (including the generation counter) is
402 Initialize with zero to indicate nothing there. */
403
404 /* Add the dtv to the thread data structures. */
405 INSTALL_DTV (result, dtv);
406 }
407 else
408 result = NULL;
409
410 return result;
411 }
412
413 /* Get size and alignment requirements of the static TLS block. This
414 function is no longer used by glibc itself, but the GCC sanitizers
415 use it despite the GLIBC_PRIVATE status. */
416 void
417 _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
418 {
419 *sizep = GLRO (dl_tls_static_size);
420 *alignp = GLRO (dl_tls_static_align);
421 }
422
423 /* Derive the location of the pointer to the start of the original
424 allocation (before alignment) from the pointer to the TCB. */
425 static inline void **
426 tcb_to_pointer_to_free_location (void *tcb)
427 {
428 #if TLS_TCB_AT_TP
429 /* The TCB follows the TLS blocks, and the pointer to the front
430 follows the TCB. */
431 void **original_pointer_location = tcb + TLS_TCB_SIZE;
432 #elif TLS_DTV_AT_TP
433 /* The TCB comes first, preceded by the pre-TCB, and the pointer is
434 before that. */
435 void **original_pointer_location = tcb - TLS_PRE_TCB_SIZE - sizeof (void *);
436 #endif
437 return original_pointer_location;
438 }
439
440 void *
441 _dl_allocate_tls_storage (void)
442 {
443 void *result;
444 size_t size = GLRO (dl_tls_static_size);
445
446 #if TLS_DTV_AT_TP
447 /* Memory layout is:
448 [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ]
449 ^ This should be returned. */
450 size += TLS_PRE_TCB_SIZE;
451 #endif
452
453 /* Reserve space for the required alignment and the pointer to the
454 original allocation. */
455 size_t alignment = GLRO (dl_tls_static_align);
456
457 /* Perform the allocation. */
458 _dl_tls_allocate_begin ();
459 void *allocated = malloc (size + alignment + sizeof (void *));
460 if (__glibc_unlikely (allocated == NULL))
461 {
462 _dl_tls_allocate_end ();
463 return NULL;
464 }
465
466 /* Perform alignment and allocate the DTV. */
467 #if TLS_TCB_AT_TP
468 /* The TCB follows the TLS blocks, which determine the alignment.
469 (TCB alignment requirements have been taken into account when
470 calculating GLRO (dl_tls_static_align).) */
471 void *aligned = (void *) roundup ((uintptr_t) allocated, alignment);
472 result = aligned + size - TLS_TCB_SIZE;
473
474 /* Clear the TCB data structure. We can't ask the caller (i.e.
475 libpthread) to do it, because we will initialize the DTV et al. */
476 memset (result, '\0', TLS_TCB_SIZE);
477 #elif TLS_DTV_AT_TP
478 /* Pre-TCB and TCB come before the TLS blocks. The layout computed
479 in _dl_determine_tlsoffset assumes that the TCB is aligned to the
480 TLS block alignment, and not just the TLS blocks after it. This
481 can leave an unused alignment gap between the TCB and the TLS
482 blocks. */
483 result = (void *) roundup
484 (sizeof (void *) + TLS_PRE_TCB_SIZE + (uintptr_t) allocated,
485 alignment);
486
487 /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before
488 it. We can't ask the caller (i.e. libpthread) to do it, because
489 we will initialize the DTV et al. */
490 memset (result - TLS_PRE_TCB_SIZE, '\0', TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
491 #endif
492
493 /* Record the value of the original pointer for later
494 deallocation. */
495 *tcb_to_pointer_to_free_location (result) = allocated;
496
497 result = allocate_dtv (result);
498 if (result == NULL)
499 free (allocated);
500
501 _dl_tls_allocate_end ();
502 return result;
503 }
504
505
506 #ifndef SHARED
507 extern dtv_t _dl_static_dtv[];
508 # define _dl_initial_dtv (&_dl_static_dtv[1])
509 #endif
510
511 static dtv_t *
512 _dl_resize_dtv (dtv_t *dtv, size_t max_modid)
513 {
514 /* Resize the dtv. */
515 dtv_t *newp;
516 size_t newsize = max_modid + DTV_SURPLUS;
517 size_t oldsize = dtv[-1].counter;
518
519 _dl_tls_allocate_begin ();
520 if (dtv == GL(dl_initial_dtv))
521 {
522 /* This is the initial dtv that was either statically allocated in
523 __libc_setup_tls or allocated during rtld startup using the
524 dl-minimal.c malloc instead of the real malloc. We can't free
525 it, we have to abandon the old storage. */
526
527 newp = malloc ((2 + newsize) * sizeof (dtv_t));
528 if (newp == NULL)
529 oom ();
530 memcpy (newp, &dtv[-1], (2 + oldsize) * sizeof (dtv_t));
531 }
532 else
533 {
534 newp = realloc (&dtv[-1],
535 (2 + newsize) * sizeof (dtv_t));
536 if (newp == NULL)
537 oom ();
538 }
539 _dl_tls_allocate_end ();
540
541 newp[0].counter = newsize;
542
543 /* Clear the newly allocated part. */
544 memset (newp + 2 + oldsize, '\0',
545 (newsize - oldsize) * sizeof (dtv_t));
546
547 /* Return the generation counter. */
548 return &newp[1];
549 }
550
551
552 /* Allocate initial TLS. RESULT should be a non-NULL pointer to storage
553 for the TLS space. The DTV may be resized, and so this function may
554 call malloc to allocate that space. The loader's GL(dl_load_tls_lock)
555 is taken when manipulating global TLS-related data in the loader. */
556 void *
557 _dl_allocate_tls_init (void *result, bool init_tls)
558 {
559 if (result == NULL)
560 /* The memory allocation failed. */
561 return NULL;
562
563 dtv_t *dtv = GET_DTV (result);
564 struct dtv_slotinfo_list *listp;
565 size_t total = 0;
566 size_t maxgen = 0;
567
568 /* Protects global dynamic TLS related state. */
569 __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
570
571 /* Check if the current dtv is big enough. */
572 if (dtv[-1].counter < GL(dl_tls_max_dtv_idx))
573 {
574 /* Resize the dtv. */
575 dtv = _dl_resize_dtv (dtv, GL(dl_tls_max_dtv_idx));
576
577 /* Install this new dtv in the thread data structures. */
578 INSTALL_DTV (result, &dtv[-1]);
579 }
580
581 /* We have to prepare the dtv for all currently loaded modules using
582 TLS. For those which are dynamically loaded we add the values
583 indicating deferred allocation. */
584 listp = GL(dl_tls_dtv_slotinfo_list);
585 while (1)
586 {
587 size_t cnt;
588
589 for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
590 {
591 struct link_map *map;
592 void *dest;
593
594 /* Check for the total number of used slots. */
595 if (total + cnt > GL(dl_tls_max_dtv_idx))
596 break;
597
598 map = listp->slotinfo[cnt].map;
599 if (map == NULL)
600 /* Unused entry. */
601 continue;
602
603 /* Keep track of the maximum generation number. This might
604 not be the generation counter. */
605 assert (listp->slotinfo[cnt].gen <= GL(dl_tls_generation));
606 maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
607
608 dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
609 dtv[map->l_tls_modid].pointer.to_free = NULL;
610
611 if (map->l_tls_offset == NO_TLS_OFFSET
612 || map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET)
613 continue;
614
615 assert (map->l_tls_modid == total + cnt);
616 assert (map->l_tls_blocksize >= map->l_tls_initimage_size);
617 #if TLS_TCB_AT_TP
618 assert ((size_t) map->l_tls_offset >= map->l_tls_blocksize);
619 dest = (char *) result - map->l_tls_offset;
620 #elif TLS_DTV_AT_TP
621 dest = (char *) result + map->l_tls_offset;
622 #else
623 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
624 #endif
625
626 /* Set up the DTV entry. The simplified __tls_get_addr that
627 some platforms use in static programs requires it. */
628 dtv[map->l_tls_modid].pointer.val = dest;
629
630 /* Copy the initialization image and clear the BSS part. For
631 audit modules or dependencies with initial-exec TLS, we can not
632 set the initial TLS image on default loader initialization
633 because it would already be set by the audit setup. However,
634 subsequent thread creation would need to follow the default
635 behaviour. */
636 if (map->l_ns != LM_ID_BASE && !init_tls)
637 continue;
638 memset (__mempcpy (dest, map->l_tls_initimage,
639 map->l_tls_initimage_size), '\0',
640 map->l_tls_blocksize - map->l_tls_initimage_size);
641 }
642
643 total += cnt;
644 if (total > GL(dl_tls_max_dtv_idx))
645 break;
646
647 listp = listp->next;
648 assert (listp != NULL);
649 }
650 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
651
652 /* The DTV version is up-to-date now. */
653 dtv[0].counter = maxgen;
654
655 return result;
656 }
657 rtld_hidden_def (_dl_allocate_tls_init)
658
659 void *
660 _dl_allocate_tls (void *mem)
661 {
662 return _dl_allocate_tls_init (mem == NULL
663 ? _dl_allocate_tls_storage ()
664 : allocate_dtv (mem), true);
665 }
666 rtld_hidden_def (_dl_allocate_tls)
667
668
669 void
670 _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
671 {
672 dtv_t *dtv = GET_DTV (tcb);
673
674 /* We need to free the memory allocated for non-static TLS. */
675 for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
676 free (dtv[1 + cnt].pointer.to_free);
677
678 /* The array starts with dtv[-1]. */
679 if (dtv != GL(dl_initial_dtv))
680 free (dtv - 1);
681
682 if (dealloc_tcb)
683 free (*tcb_to_pointer_to_free_location (tcb));
684 }
685 rtld_hidden_def (_dl_deallocate_tls)
686
687
688 #ifdef SHARED
689 /* The __tls_get_addr function has two basic forms which differ in the
690 arguments. The IA-64 form takes two parameters, the module ID and
691 offset. The form used, among others, on IA-32 takes a reference to
692 a special structure which contain the same information. The second
693 form seems to be more often used (in the moment) so we default to
694 it. Users of the IA-64 form have to provide adequate definitions
695 of the following macros. */
696 # ifndef GET_ADDR_ARGS
697 # define GET_ADDR_ARGS tls_index *ti
698 # define GET_ADDR_PARAM ti
699 # endif
700 # ifndef GET_ADDR_MODULE
701 # define GET_ADDR_MODULE ti->ti_module
702 # endif
703 # ifndef GET_ADDR_OFFSET
704 # define GET_ADDR_OFFSET ti->ti_offset
705 # endif
706
707 /* Allocate one DTV entry. */
708 static struct dtv_pointer
709 allocate_dtv_entry (size_t alignment, size_t size)
710 {
711 if (powerof2 (alignment) && alignment <= _Alignof (max_align_t))
712 {
713 /* The alignment is supported by malloc. */
714 _dl_tls_allocate_begin ();
715 void *ptr = malloc (size);
716 _dl_tls_allocate_end ();
717 return (struct dtv_pointer) { ptr, ptr };
718 }
719
720 /* Emulate memalign to by manually aligning a pointer returned by
721 malloc. First compute the size with an overflow check. */
722 size_t alloc_size = size + alignment;
723 if (alloc_size < size)
724 return (struct dtv_pointer) {};
725
726 /* Perform the allocation. This is the pointer we need to free
727 later. */
728 _dl_tls_allocate_begin ();
729 void *start = malloc (alloc_size);
730 _dl_tls_allocate_end ();
731
732 if (start == NULL)
733 return (struct dtv_pointer) {};
734
735 /* Find the aligned position within the larger allocation. */
736 void *aligned = (void *) roundup ((uintptr_t) start, alignment);
737
738 return (struct dtv_pointer) { .val = aligned, .to_free = start };
739 }
740
741 static struct dtv_pointer
742 allocate_and_init (struct link_map *map)
743 {
744 struct dtv_pointer result = allocate_dtv_entry
745 (map->l_tls_align, map->l_tls_blocksize);
746 if (result.val == NULL)
747 oom ();
748
749 /* Initialize the memory. */
750 memset (__mempcpy (result.val, map->l_tls_initimage,
751 map->l_tls_initimage_size),
752 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
753
754 return result;
755 }
756
757
758 struct link_map *
759 _dl_update_slotinfo (unsigned long int req_modid, size_t new_gen)
760 {
761 struct link_map *the_map = NULL;
762 dtv_t *dtv = THREAD_DTV ();
763
764 /* CONCURRENCY NOTES:
765
766 The global dl_tls_dtv_slotinfo_list array contains for each module
767 index the generation counter current when that entry was updated.
768 This array never shrinks so that all module indices which were
769 valid at some time can be used to access it. Concurrent loading
770 and unloading of modules can update slotinfo entries or extend
771 the array. The updates happen under the GL(dl_load_tls_lock) and
772 finish with the release store of the generation counter to
773 GL(dl_tls_generation) which is synchronized with the load of
774 new_gen in the caller. So updates up to new_gen are synchronized
775 but updates for later generations may not be.
776
777 Here we update the thread dtv from old_gen (== dtv[0].counter) to
778 new_gen generation. For this, each dtv[i] entry is either set to
779 an unallocated state (set), or left unmodified (nop). Where (set)
780 may resize the dtv first if modid i >= dtv[-1].counter. The rules
781 for the decision between (set) and (nop) are
782
783 (1) If slotinfo entry i is concurrently updated then either (set)
784 or (nop) is valid: TLS access cannot use dtv[i] unless it is
785 synchronized with a generation > new_gen.
786
787 Otherwise, if the generation of slotinfo entry i is gen and the
788 loaded module for this entry is map then
789
790 (2) If gen <= old_gen then do (nop).
791
792 (3) If old_gen < gen <= new_gen then
793 (3.1) if map != 0 then (set)
794 (3.2) if map == 0 then either (set) or (nop).
795
796 Note that (1) cannot be reliably detected, but since both actions
797 are valid it does not have to be. Only (2) and (3.1) cases need
798 to be distinguished for which relaxed mo access of gen and map is
799 enough: their value is synchronized when it matters.
800
801 Note that a relaxed mo load may give an out-of-thin-air value since
802 it is used in decisions that can affect concurrent stores. But this
803 should only happen if the OOTA value causes UB that justifies the
804 concurrent store of the value. This is not expected to be an issue
805 in practice. */
806 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
807
808 if (dtv[0].counter < new_gen)
809 {
810 size_t total = 0;
811 size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
812 assert (max_modid >= req_modid);
813
814 /* We have to look through the entire dtv slotinfo list. */
815 listp = GL(dl_tls_dtv_slotinfo_list);
816 do
817 {
818 for (size_t cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
819 {
820 size_t modid = total + cnt;
821
822 /* Case (1) for all later modids. */
823 if (modid > max_modid)
824 break;
825
826 size_t gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen);
827
828 /* Case (1). */
829 if (gen > new_gen)
830 continue;
831
832 /* Case (2) or (1). */
833 if (gen <= dtv[0].counter)
834 continue;
835
836 /* Case (3) or (1). */
837
838 /* If there is no map this means the entry is empty. */
839 struct link_map *map
840 = atomic_load_relaxed (&listp->slotinfo[cnt].map);
841 /* Check whether the current dtv array is large enough. */
842 if (dtv[-1].counter < modid)
843 {
844 /* Case (3.2) or (1). */
845 if (map == NULL)
846 continue;
847
848 /* Resizing the dtv aborts on failure: bug 16134. */
849 dtv = _dl_resize_dtv (dtv, max_modid);
850
851 assert (modid <= dtv[-1].counter);
852
853 /* Install this new dtv in the thread data
854 structures. */
855 INSTALL_NEW_DTV (dtv);
856 }
857
858 /* If there is currently memory allocate for this
859 dtv entry free it. Note: this is not AS-safe. */
860 /* XXX Ideally we will at some point create a memory
861 pool. */
862 /* Avoid calling free on a null pointer. Some mallocs
863 incorrectly use dynamic TLS, and depending on how the
864 free function was compiled, it could call
865 __tls_get_addr before the null pointer check in the
866 free implementation. Checking here papers over at
867 least some dynamic TLS usage by interposed mallocs. */
868 if (dtv[modid].pointer.to_free != NULL)
869 {
870 _dl_tls_allocate_begin ();
871 free (dtv[modid].pointer.to_free);
872 _dl_tls_allocate_end ();
873 }
874 dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
875 dtv[modid].pointer.to_free = NULL;
876
877 if (modid == req_modid)
878 the_map = map;
879 }
880
881 total += listp->len;
882 if (total > max_modid)
883 break;
884
885 /* Synchronize with _dl_add_to_slotinfo. Ideally this would
886 be consume MO since we only need to order the accesses to
887 the next node after the read of the address and on most
888 hardware (other than alpha) a normal load would do that
889 because of the address dependency. */
890 listp = atomic_load_acquire (&listp->next);
891 }
892 while (listp != NULL);
893
894 /* This will be the new maximum generation counter. */
895 dtv[0].counter = new_gen;
896 }
897
898 return the_map;
899 }
900
901
902 static void *
903 __attribute_noinline__
904 tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
905 {
906 /* The allocation was deferred. Do it now. */
907 if (the_map == NULL)
908 {
909 /* Find the link map for this module. */
910 size_t idx = GET_ADDR_MODULE;
911 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
912
913 while (idx >= listp->len)
914 {
915 idx -= listp->len;
916 listp = listp->next;
917 }
918
919 the_map = listp->slotinfo[idx].map;
920 }
921
922 /* Make sure that, if a dlopen running in parallel forces the
923 variable into static storage, we'll wait until the address in the
924 static TLS block is set up, and use that. If we're undecided
925 yet, make sure we make the decision holding the lock as well. */
926 if (__glibc_unlikely (the_map->l_tls_offset
927 != FORCED_DYNAMIC_TLS_OFFSET))
928 {
929 __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
930 if (__glibc_likely (the_map->l_tls_offset == NO_TLS_OFFSET))
931 {
932 the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET;
933 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
934 }
935 else if (__glibc_likely (the_map->l_tls_offset
936 != FORCED_DYNAMIC_TLS_OFFSET))
937 {
938 #if TLS_TCB_AT_TP
939 void *p = (char *) THREAD_SELF - the_map->l_tls_offset;
940 #elif TLS_DTV_AT_TP
941 void *p = (char *) THREAD_SELF + the_map->l_tls_offset + TLS_PRE_TCB_SIZE;
942 #else
943 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
944 #endif
945 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
946
947 dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
948 dtv[GET_ADDR_MODULE].pointer.val = p;
949
950 return (char *) p + GET_ADDR_OFFSET;
951 }
952 else
953 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
954 }
955 struct dtv_pointer result = allocate_and_init (the_map);
956 dtv[GET_ADDR_MODULE].pointer = result;
957 assert (result.to_free != NULL);
958
959 return (char *) result.val + GET_ADDR_OFFSET;
960 }
961
962
963 static struct link_map *
964 __attribute_noinline__
965 update_get_addr (GET_ADDR_ARGS, size_t gen)
966 {
967 struct link_map *the_map = _dl_update_slotinfo (GET_ADDR_MODULE, gen);
968 dtv_t *dtv = THREAD_DTV ();
969
970 void *p = dtv[GET_ADDR_MODULE].pointer.val;
971
972 if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
973 return tls_get_addr_tail (GET_ADDR_PARAM, dtv, the_map);
974
975 return (void *) p + GET_ADDR_OFFSET;
976 }
977
978 /* For all machines that have a non-macro version of __tls_get_addr, we
979 want to use rtld_hidden_proto/rtld_hidden_def in order to call the
980 internal alias for __tls_get_addr from ld.so. This avoids a PLT entry
981 in ld.so for __tls_get_addr. */
982
983 #ifndef __tls_get_addr
984 extern void * __tls_get_addr (GET_ADDR_ARGS);
985 rtld_hidden_proto (__tls_get_addr)
986 rtld_hidden_def (__tls_get_addr)
987 #endif
988
989 /* The generic dynamic and local dynamic model cannot be used in
990 statically linked applications. */
991 void *
992 __tls_get_addr (GET_ADDR_ARGS)
993 {
994 dtv_t *dtv = THREAD_DTV ();
995
996 /* Update is needed if dtv[0].counter < the generation of the accessed
997 module, but the global generation counter is easier to check (which
998 must be synchronized up to the generation of the accessed module by
999 user code doing the TLS access so relaxed mo read is enough). */
1000 size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
1001 if (__glibc_unlikely (dtv[0].counter != gen))
1002 {
1003 if (_dl_tls_allocate_active ()
1004 && GET_ADDR_MODULE < _dl_tls_initial_modid_limit)
1005 /* This is a reentrant __tls_get_addr call, but we can
1006 satisfy it because it's an initially-loaded module ID.
1007 These TLS slotinfo slots do not change, so the
1008 out-of-date generation counter does not matter. However,
1009 if not in a TLS update, still update_get_addr below, to
1010 get off the slow path eventually. */
1011 ;
1012 else
1013 {
1014 /* Update DTV up to the global generation, see CONCURRENCY NOTES
1015 in _dl_update_slotinfo. */
1016 gen = atomic_load_acquire (&GL(dl_tls_generation));
1017 return update_get_addr (GET_ADDR_PARAM, gen);
1018 }
1019 }
1020
1021 void *p = dtv[GET_ADDR_MODULE].pointer.val;
1022
1023 if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
1024 return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
1025
1026 return (char *) p + GET_ADDR_OFFSET;
1027 }
1028 #endif /* SHARED */
1029
1030
1031 /* Look up the module's TLS block as for __tls_get_addr,
1032 but never touch anything. Return null if it's not allocated yet. */
1033 void *
1034 _dl_tls_get_addr_soft (struct link_map *l)
1035 {
1036 if (__glibc_unlikely (l->l_tls_modid == 0))
1037 /* This module has no TLS segment. */
1038 return NULL;
1039
1040 dtv_t *dtv = THREAD_DTV ();
1041 /* This may be called without holding the GL(dl_load_tls_lock). Reading
1042 arbitrary gen value is fine since this is best effort code. */
1043 size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
1044 if (__glibc_unlikely (dtv[0].counter != gen))
1045 {
1046 /* This thread's DTV is not completely current,
1047 but it might already cover this module. */
1048
1049 if (l->l_tls_modid >= dtv[-1].counter)
1050 /* Nope. */
1051 return NULL;
1052
1053 size_t idx = l->l_tls_modid;
1054 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
1055 while (idx >= listp->len)
1056 {
1057 idx -= listp->len;
1058 listp = listp->next;
1059 }
1060
1061 /* We've reached the slot for this module.
1062 If its generation counter is higher than the DTV's,
1063 this thread does not know about this module yet. */
1064 if (dtv[0].counter < listp->slotinfo[idx].gen)
1065 return NULL;
1066 }
1067
1068 void *data = dtv[l->l_tls_modid].pointer.val;
1069 if (__glibc_unlikely (data == TLS_DTV_UNALLOCATED))
1070 /* The DTV is current, but this thread has not yet needed
1071 to allocate this module's segment. */
1072 data = NULL;
1073
1074 return data;
1075 }
1076
1077 size_t _dl_tls_initial_modid_limit;
1078
1079 void
1080 _dl_tls_initial_modid_limit_setup (void)
1081 {
1082 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
1083 size_t idx;
1084 for (idx = 0; idx < listp->len; ++idx)
1085 {
1086 struct link_map *l = listp->slotinfo[idx].map;
1087 if (l == NULL
1088 /* The object can be unloaded, so its modid can be
1089 reassociated. */
1090 || !(l->l_type == lt_executable || l->l_type == lt_library))
1091 break;
1092 }
1093 _dl_tls_initial_modid_limit = idx;
1094 }
1095
1096
1097 void
1098 _dl_add_to_slotinfo (struct link_map *l, bool do_add)
1099 {
1100 /* Now that we know the object is loaded successfully add
1101 modules containing TLS data to the dtv info table. We
1102 might have to increase its size. */
1103 struct dtv_slotinfo_list *listp;
1104 struct dtv_slotinfo_list *prevp;
1105 size_t idx = l->l_tls_modid;
1106
1107 /* Find the place in the dtv slotinfo list. */
1108 listp = GL(dl_tls_dtv_slotinfo_list);
1109 prevp = NULL; /* Needed to shut up gcc. */
1110 do
1111 {
1112 /* Does it fit in the array of this list element? */
1113 if (idx < listp->len)
1114 break;
1115 idx -= listp->len;
1116 prevp = listp;
1117 listp = listp->next;
1118 }
1119 while (listp != NULL);
1120
1121 if (listp == NULL)
1122 {
1123 /* When we come here it means we have to add a new element
1124 to the slotinfo list. And the new module must be in
1125 the first slot. */
1126 assert (idx == 0);
1127
1128 _dl_tls_allocate_begin ();
1129 listp = (struct dtv_slotinfo_list *)
1130 malloc (sizeof (struct dtv_slotinfo_list)
1131 + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
1132 _dl_tls_allocate_end ();
1133 if (listp == NULL)
1134 {
1135 /* We ran out of memory while resizing the dtv slotinfo list. */
1136 _dl_signal_error (ENOMEM, "dlopen", NULL, N_("\
1137 cannot create TLS data structures"));
1138 }
1139
1140 listp->len = TLS_SLOTINFO_SURPLUS;
1141 listp->next = NULL;
1142 memset (listp->slotinfo, '\0',
1143 TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
1144 /* Synchronize with _dl_update_slotinfo. */
1145 atomic_store_release (&prevp->next, listp);
1146 }
1147
1148 /* Add the information into the slotinfo data structure. */
1149 if (do_add)
1150 {
1151 /* Can be read concurrently. See _dl_update_slotinfo. */
1152 atomic_store_relaxed (&listp->slotinfo[idx].map, l);
1153 atomic_store_relaxed (&listp->slotinfo[idx].gen,
1154 GL(dl_tls_generation) + 1);
1155 }
1156 }
1157
1158 #if PTHREAD_IN_LIBC
1159 static inline void __attribute__((always_inline))
1160 init_one_static_tls (struct pthread *curp, struct link_map *map)
1161 {
1162 # if TLS_TCB_AT_TP
1163 void *dest = (char *) curp - map->l_tls_offset;
1164 # elif TLS_DTV_AT_TP
1165 void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1166 # else
1167 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1168 # endif
1169
1170 /* Initialize the memory. */
1171 memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1172 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
1173 }
1174
1175 void
1176 _dl_init_static_tls (struct link_map *map)
1177 {
1178 lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
1179
1180 /* Iterate over the list with system-allocated threads first. */
1181 list_t *runp;
1182 list_for_each (runp, &GL (dl_stack_used))
1183 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1184
1185 /* Now the list with threads using user-allocated stacks. */
1186 list_for_each (runp, &GL (dl_stack_user))
1187 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1188
1189 lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
1190 }
1191 #endif /* PTHREAD_IN_LIBC */
This page took 0.090544 seconds and 5 git commands to generate.