]> sourceware.org Git - glibc.git/blob - elf/dl-tls.c
Remove TLS_TCB_ALIGN and TLS_INIT_TCB_ALIGN
[glibc.git] / elf / dl-tls.c
1 /* Thread-local storage handling in the ELF dynamic linker. Generic version.
2 Copyright (C) 2002-2021 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #include <assert.h>
20 #include <errno.h>
21 #include <libintl.h>
22 #include <signal.h>
23 #include <stdlib.h>
24 #include <unistd.h>
25 #include <sys/param.h>
26 #include <atomic.h>
27
28 #include <tls.h>
29 #include <dl-tls.h>
30 #include <ldsodefs.h>
31
32 #if PTHREAD_IN_LIBC
33 # include <list.h>
34 #endif
35
36 #define TUNABLE_NAMESPACE rtld
37 #include <dl-tunables.h>
38
39 /* Surplus static TLS, GLRO(dl_tls_static_surplus), is used for
40
41 - IE TLS in libc.so for all dlmopen namespaces except in the initial
42 one where libc.so is not loaded dynamically but at startup time,
43 - IE TLS in other libraries which may be dynamically loaded even in the
44 initial namespace,
45 - and optionally for optimizing dynamic TLS access.
46
47 The maximum number of namespaces is DL_NNS, but to support that many
48 namespaces correctly the static TLS allocation should be significantly
49 increased, which may cause problems with small thread stacks due to the
50 way static TLS is accounted (bug 11787).
51
52 So there is a rtld.nns tunable limit on the number of supported namespaces
53 that affects the size of the static TLS and by default it's small enough
54 not to cause problems with existing applications. The limit is not
55 enforced or checked: it is the user's responsibility to increase rtld.nns
56 if more dlmopen namespaces are used.
57
58 Audit modules use their own namespaces, they are not included in rtld.nns,
59 but come on top when computing the number of namespaces. */
60
61 /* Size of initial-exec TLS in libc.so. This should be the maximum of
62 observed PT_GNU_TLS sizes across all architectures. Some
63 architectures have lower values due to differences in type sizes
64 and link editor capabilities. */
65 #define LIBC_IE_TLS 144
66
67 /* Size of initial-exec TLS in libraries other than libc.so.
68 This should be large enough to cover runtime libraries of the
69 compiler such as libgomp and libraries in libc other than libc.so. */
70 #define OTHER_IE_TLS 144
71
72 /* Default number of namespaces. */
73 #define DEFAULT_NNS 4
74
75 /* Default for dl_tls_static_optional. */
76 #define OPTIONAL_TLS 512
77
78 /* Compute the static TLS surplus based on the namespace count and the
79 TLS space that can be used for optimizations. */
80 static inline int
81 tls_static_surplus (int nns, int opt_tls)
82 {
83 return (nns - 1) * LIBC_IE_TLS + nns * OTHER_IE_TLS + opt_tls;
84 }
85
86 /* This value is chosen so that with default values for the tunables,
87 the computation of dl_tls_static_surplus in
88 _dl_tls_static_surplus_init yields the historic value 1664, for
89 backwards compatibility. */
90 #define LEGACY_TLS (1664 - tls_static_surplus (DEFAULT_NNS, OPTIONAL_TLS))
91
92 /* Calculate the size of the static TLS surplus, when the given
93 number of audit modules are loaded. Must be called after the
94 number of audit modules is known and before static TLS allocation. */
95 void
96 _dl_tls_static_surplus_init (size_t naudit)
97 {
98 size_t nns, opt_tls;
99
100 #if HAVE_TUNABLES
101 nns = TUNABLE_GET (nns, size_t, NULL);
102 opt_tls = TUNABLE_GET (optional_static_tls, size_t, NULL);
103 #else
104 /* Default values of the tunables. */
105 nns = DEFAULT_NNS;
106 opt_tls = OPTIONAL_TLS;
107 #endif
108 if (nns > DL_NNS)
109 nns = DL_NNS;
110 if (DL_NNS - nns < naudit)
111 _dl_fatal_printf ("Failed loading %lu audit modules, %lu are supported.\n",
112 (unsigned long) naudit, (unsigned long) (DL_NNS - nns));
113 nns += naudit;
114
115 GL(dl_tls_static_optional) = opt_tls;
116 assert (LEGACY_TLS >= 0);
117 GLRO(dl_tls_static_surplus) = tls_static_surplus (nns, opt_tls) + LEGACY_TLS;
118 }
119
120 /* Out-of-memory handler. */
121 static void
122 __attribute__ ((__noreturn__))
123 oom (void)
124 {
125 _dl_fatal_printf ("cannot allocate memory for thread-local data: ABORT\n");
126 }
127
128
129 void
130 _dl_assign_tls_modid (struct link_map *l)
131 {
132 size_t result;
133
134 if (__builtin_expect (GL(dl_tls_dtv_gaps), false))
135 {
136 size_t disp = 0;
137 struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
138
139 /* Note that this branch will never be executed during program
140 start since there are no gaps at that time. Therefore it
141 does not matter that the dl_tls_dtv_slotinfo is not allocated
142 yet when the function is called for the first times.
143
144 NB: the offset +1 is due to the fact that DTV[0] is used
145 for something else. */
146 result = GL(dl_tls_static_nelem) + 1;
147 if (result <= GL(dl_tls_max_dtv_idx))
148 do
149 {
150 while (result - disp < runp->len)
151 {
152 if (runp->slotinfo[result - disp].map == NULL)
153 break;
154
155 ++result;
156 assert (result <= GL(dl_tls_max_dtv_idx) + 1);
157 }
158
159 if (result - disp < runp->len)
160 {
161 /* Mark the entry as used, so any dependency see it. */
162 atomic_store_relaxed (&runp->slotinfo[result - disp].map, l);
163 break;
164 }
165
166 disp += runp->len;
167 }
168 while ((runp = runp->next) != NULL);
169
170 if (result > GL(dl_tls_max_dtv_idx))
171 {
172 /* The new index must indeed be exactly one higher than the
173 previous high. */
174 assert (result == GL(dl_tls_max_dtv_idx) + 1);
175 /* There is no gap anymore. */
176 GL(dl_tls_dtv_gaps) = false;
177
178 goto nogaps;
179 }
180 }
181 else
182 {
183 /* No gaps, allocate a new entry. */
184 nogaps:
185
186 result = GL(dl_tls_max_dtv_idx) + 1;
187 /* Can be read concurrently. */
188 atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), result);
189 }
190
191 l->l_tls_modid = result;
192 }
193
194
195 size_t
196 _dl_count_modids (void)
197 {
198 /* The count is the max unless dlclose or failed dlopen created gaps. */
199 if (__glibc_likely (!GL(dl_tls_dtv_gaps)))
200 return GL(dl_tls_max_dtv_idx);
201
202 /* We have gaps and are forced to count the non-NULL entries. */
203 size_t n = 0;
204 struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
205 while (runp != NULL)
206 {
207 for (size_t i = 0; i < runp->len; ++i)
208 if (runp->slotinfo[i].map != NULL)
209 ++n;
210
211 runp = runp->next;
212 }
213
214 return n;
215 }
216
217
218 #ifdef SHARED
219 void
220 _dl_determine_tlsoffset (void)
221 {
222 size_t max_align = TCB_ALIGNMENT;
223 size_t freetop = 0;
224 size_t freebottom = 0;
225
226 /* The first element of the dtv slot info list is allocated. */
227 assert (GL(dl_tls_dtv_slotinfo_list) != NULL);
228 /* There is at this point only one element in the
229 dl_tls_dtv_slotinfo_list list. */
230 assert (GL(dl_tls_dtv_slotinfo_list)->next == NULL);
231
232 struct dtv_slotinfo *slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
233
234 /* Determining the offset of the various parts of the static TLS
235 block has several dependencies. In addition we have to work
236 around bugs in some toolchains.
237
238 Each TLS block from the objects available at link time has a size
239 and an alignment requirement. The GNU ld computes the alignment
240 requirements for the data at the positions *in the file*, though.
241 I.e, it is not simply possible to allocate a block with the size
242 of the TLS program header entry. The data is layed out assuming
243 that the first byte of the TLS block fulfills
244
245 p_vaddr mod p_align == &TLS_BLOCK mod p_align
246
247 This means we have to add artificial padding at the beginning of
248 the TLS block. These bytes are never used for the TLS data in
249 this module but the first byte allocated must be aligned
250 according to mod p_align == 0 so that the first byte of the TLS
251 block is aligned according to p_vaddr mod p_align. This is ugly
252 and the linker can help by computing the offsets in the TLS block
253 assuming the first byte of the TLS block is aligned according to
254 p_align.
255
256 The extra space which might be allocated before the first byte of
257 the TLS block need not go unused. The code below tries to use
258 that memory for the next TLS block. This can work if the total
259 memory requirement for the next TLS block is smaller than the
260 gap. */
261
262 #if TLS_TCB_AT_TP
263 /* We simply start with zero. */
264 size_t offset = 0;
265
266 for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
267 {
268 assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
269
270 size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
271 & (slotinfo[cnt].map->l_tls_align - 1));
272 size_t off;
273 max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
274
275 if (freebottom - freetop >= slotinfo[cnt].map->l_tls_blocksize)
276 {
277 off = roundup (freetop + slotinfo[cnt].map->l_tls_blocksize
278 - firstbyte, slotinfo[cnt].map->l_tls_align)
279 + firstbyte;
280 if (off <= freebottom)
281 {
282 freetop = off;
283
284 /* XXX For some architectures we perhaps should store the
285 negative offset. */
286 slotinfo[cnt].map->l_tls_offset = off;
287 continue;
288 }
289 }
290
291 off = roundup (offset + slotinfo[cnt].map->l_tls_blocksize - firstbyte,
292 slotinfo[cnt].map->l_tls_align) + firstbyte;
293 if (off > offset + slotinfo[cnt].map->l_tls_blocksize
294 + (freebottom - freetop))
295 {
296 freetop = offset;
297 freebottom = off - slotinfo[cnt].map->l_tls_blocksize;
298 }
299 offset = off;
300
301 /* XXX For some architectures we perhaps should store the
302 negative offset. */
303 slotinfo[cnt].map->l_tls_offset = off;
304 }
305
306 GL(dl_tls_static_used) = offset;
307 GLRO (dl_tls_static_size) = (roundup (offset + GLRO(dl_tls_static_surplus),
308 max_align)
309 + TLS_TCB_SIZE);
310 #elif TLS_DTV_AT_TP
311 /* The TLS blocks start right after the TCB. */
312 size_t offset = TLS_TCB_SIZE;
313
314 for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
315 {
316 assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
317
318 size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
319 & (slotinfo[cnt].map->l_tls_align - 1));
320 size_t off;
321 max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
322
323 if (slotinfo[cnt].map->l_tls_blocksize <= freetop - freebottom)
324 {
325 off = roundup (freebottom, slotinfo[cnt].map->l_tls_align);
326 if (off - freebottom < firstbyte)
327 off += slotinfo[cnt].map->l_tls_align;
328 if (off + slotinfo[cnt].map->l_tls_blocksize - firstbyte <= freetop)
329 {
330 slotinfo[cnt].map->l_tls_offset = off - firstbyte;
331 freebottom = (off + slotinfo[cnt].map->l_tls_blocksize
332 - firstbyte);
333 continue;
334 }
335 }
336
337 off = roundup (offset, slotinfo[cnt].map->l_tls_align);
338 if (off - offset < firstbyte)
339 off += slotinfo[cnt].map->l_tls_align;
340
341 slotinfo[cnt].map->l_tls_offset = off - firstbyte;
342 if (off - firstbyte - offset > freetop - freebottom)
343 {
344 freebottom = offset;
345 freetop = off - firstbyte;
346 }
347
348 offset = off + slotinfo[cnt].map->l_tls_blocksize - firstbyte;
349 }
350
351 GL(dl_tls_static_used) = offset;
352 GLRO (dl_tls_static_size) = roundup (offset + GLRO(dl_tls_static_surplus),
353 TCB_ALIGNMENT);
354 #else
355 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
356 #endif
357
358 /* The alignment requirement for the static TLS block. */
359 GLRO (dl_tls_static_align) = max_align;
360 }
361 #endif /* SHARED */
362
363 static void *
364 allocate_dtv (void *result)
365 {
366 dtv_t *dtv;
367 size_t dtv_length;
368
369 /* Relaxed MO, because the dtv size is later rechecked, not relied on. */
370 size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
371 /* We allocate a few more elements in the dtv than are needed for the
372 initial set of modules. This should avoid in most cases expansions
373 of the dtv. */
374 dtv_length = max_modid + DTV_SURPLUS;
375 dtv = calloc (dtv_length + 2, sizeof (dtv_t));
376 if (dtv != NULL)
377 {
378 /* This is the initial length of the dtv. */
379 dtv[0].counter = dtv_length;
380
381 /* The rest of the dtv (including the generation counter) is
382 Initialize with zero to indicate nothing there. */
383
384 /* Add the dtv to the thread data structures. */
385 INSTALL_DTV (result, dtv);
386 }
387 else
388 result = NULL;
389
390 return result;
391 }
392
393 /* Get size and alignment requirements of the static TLS block. This
394 function is no longer used by glibc itself, but the GCC sanitizers
395 use it despite the GLIBC_PRIVATE status. */
396 void
397 _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
398 {
399 *sizep = GLRO (dl_tls_static_size);
400 *alignp = GLRO (dl_tls_static_align);
401 }
402
403 /* Derive the location of the pointer to the start of the original
404 allocation (before alignment) from the pointer to the TCB. */
405 static inline void **
406 tcb_to_pointer_to_free_location (void *tcb)
407 {
408 #if TLS_TCB_AT_TP
409 /* The TCB follows the TLS blocks, and the pointer to the front
410 follows the TCB. */
411 void **original_pointer_location = tcb + TLS_TCB_SIZE;
412 #elif TLS_DTV_AT_TP
413 /* The TCB comes first, preceded by the pre-TCB, and the pointer is
414 before that. */
415 void **original_pointer_location = tcb - TLS_PRE_TCB_SIZE - sizeof (void *);
416 #endif
417 return original_pointer_location;
418 }
419
420 void *
421 _dl_allocate_tls_storage (void)
422 {
423 void *result;
424 size_t size = GLRO (dl_tls_static_size);
425
426 #if TLS_DTV_AT_TP
427 /* Memory layout is:
428 [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ]
429 ^ This should be returned. */
430 size += TLS_PRE_TCB_SIZE;
431 #endif
432
433 /* Perform the allocation. Reserve space for the required alignment
434 and the pointer to the original allocation. */
435 size_t alignment = GLRO (dl_tls_static_align);
436 void *allocated = malloc (size + alignment + sizeof (void *));
437 if (__glibc_unlikely (allocated == NULL))
438 return NULL;
439
440 /* Perform alignment and allocate the DTV. */
441 #if TLS_TCB_AT_TP
442 /* The TCB follows the TLS blocks, which determine the alignment.
443 (TCB alignment requirements have been taken into account when
444 calculating GLRO (dl_tls_static_align).) */
445 void *aligned = (void *) roundup ((uintptr_t) allocated, alignment);
446 result = aligned + size - TLS_TCB_SIZE;
447
448 /* Clear the TCB data structure. We can't ask the caller (i.e.
449 libpthread) to do it, because we will initialize the DTV et al. */
450 memset (result, '\0', TLS_TCB_SIZE);
451 #elif TLS_DTV_AT_TP
452 /* Pre-TCB and TCB come before the TLS blocks. The layout computed
453 in _dl_determine_tlsoffset assumes that the TCB is aligned to the
454 TLS block alignment, and not just the TLS blocks after it. This
455 can leave an unused alignment gap between the TCB and the TLS
456 blocks. */
457 result = (void *) roundup
458 (sizeof (void *) + TLS_PRE_TCB_SIZE + (uintptr_t) allocated,
459 alignment);
460
461 /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before
462 it. We can't ask the caller (i.e. libpthread) to do it, because
463 we will initialize the DTV et al. */
464 memset (result - TLS_PRE_TCB_SIZE, '\0', TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
465 #endif
466
467 /* Record the value of the original pointer for later
468 deallocation. */
469 *tcb_to_pointer_to_free_location (result) = allocated;
470
471 result = allocate_dtv (result);
472 if (result == NULL)
473 free (allocated);
474 return result;
475 }
476
477
478 #ifndef SHARED
479 extern dtv_t _dl_static_dtv[];
480 # define _dl_initial_dtv (&_dl_static_dtv[1])
481 #endif
482
483 static dtv_t *
484 _dl_resize_dtv (dtv_t *dtv, size_t max_modid)
485 {
486 /* Resize the dtv. */
487 dtv_t *newp;
488 size_t newsize = max_modid + DTV_SURPLUS;
489 size_t oldsize = dtv[-1].counter;
490
491 if (dtv == GL(dl_initial_dtv))
492 {
493 /* This is the initial dtv that was either statically allocated in
494 __libc_setup_tls or allocated during rtld startup using the
495 dl-minimal.c malloc instead of the real malloc. We can't free
496 it, we have to abandon the old storage. */
497
498 newp = malloc ((2 + newsize) * sizeof (dtv_t));
499 if (newp == NULL)
500 oom ();
501 memcpy (newp, &dtv[-1], (2 + oldsize) * sizeof (dtv_t));
502 }
503 else
504 {
505 newp = realloc (&dtv[-1],
506 (2 + newsize) * sizeof (dtv_t));
507 if (newp == NULL)
508 oom ();
509 }
510
511 newp[0].counter = newsize;
512
513 /* Clear the newly allocated part. */
514 memset (newp + 2 + oldsize, '\0',
515 (newsize - oldsize) * sizeof (dtv_t));
516
517 /* Return the generation counter. */
518 return &newp[1];
519 }
520
521
522 void *
523 _dl_allocate_tls_init (void *result)
524 {
525 if (result == NULL)
526 /* The memory allocation failed. */
527 return NULL;
528
529 dtv_t *dtv = GET_DTV (result);
530 struct dtv_slotinfo_list *listp;
531 size_t total = 0;
532 size_t maxgen = 0;
533
534 /* Protects global dynamic TLS related state. */
535 __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
536
537 /* Check if the current dtv is big enough. */
538 if (dtv[-1].counter < GL(dl_tls_max_dtv_idx))
539 {
540 /* Resize the dtv. */
541 dtv = _dl_resize_dtv (dtv, GL(dl_tls_max_dtv_idx));
542
543 /* Install this new dtv in the thread data structures. */
544 INSTALL_DTV (result, &dtv[-1]);
545 }
546
547 /* We have to prepare the dtv for all currently loaded modules using
548 TLS. For those which are dynamically loaded we add the values
549 indicating deferred allocation. */
550 listp = GL(dl_tls_dtv_slotinfo_list);
551 while (1)
552 {
553 size_t cnt;
554
555 for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
556 {
557 struct link_map *map;
558 void *dest;
559
560 /* Check for the total number of used slots. */
561 if (total + cnt > GL(dl_tls_max_dtv_idx))
562 break;
563
564 map = listp->slotinfo[cnt].map;
565 if (map == NULL)
566 /* Unused entry. */
567 continue;
568
569 /* Keep track of the maximum generation number. This might
570 not be the generation counter. */
571 assert (listp->slotinfo[cnt].gen <= GL(dl_tls_generation));
572 maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
573
574 dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
575 dtv[map->l_tls_modid].pointer.to_free = NULL;
576
577 if (map->l_tls_offset == NO_TLS_OFFSET
578 || map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET)
579 continue;
580
581 assert (map->l_tls_modid == total + cnt);
582 assert (map->l_tls_blocksize >= map->l_tls_initimage_size);
583 #if TLS_TCB_AT_TP
584 assert ((size_t) map->l_tls_offset >= map->l_tls_blocksize);
585 dest = (char *) result - map->l_tls_offset;
586 #elif TLS_DTV_AT_TP
587 dest = (char *) result + map->l_tls_offset;
588 #else
589 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
590 #endif
591
592 /* Set up the DTV entry. The simplified __tls_get_addr that
593 some platforms use in static programs requires it. */
594 dtv[map->l_tls_modid].pointer.val = dest;
595
596 /* Copy the initialization image and clear the BSS part. */
597 memset (__mempcpy (dest, map->l_tls_initimage,
598 map->l_tls_initimage_size), '\0',
599 map->l_tls_blocksize - map->l_tls_initimage_size);
600 }
601
602 total += cnt;
603 if (total > GL(dl_tls_max_dtv_idx))
604 break;
605
606 listp = listp->next;
607 assert (listp != NULL);
608 }
609 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
610
611 /* The DTV version is up-to-date now. */
612 dtv[0].counter = maxgen;
613
614 return result;
615 }
616 rtld_hidden_def (_dl_allocate_tls_init)
617
618 void *
619 _dl_allocate_tls (void *mem)
620 {
621 return _dl_allocate_tls_init (mem == NULL
622 ? _dl_allocate_tls_storage ()
623 : allocate_dtv (mem));
624 }
625 rtld_hidden_def (_dl_allocate_tls)
626
627
628 void
629 _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
630 {
631 dtv_t *dtv = GET_DTV (tcb);
632
633 /* We need to free the memory allocated for non-static TLS. */
634 for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
635 free (dtv[1 + cnt].pointer.to_free);
636
637 /* The array starts with dtv[-1]. */
638 if (dtv != GL(dl_initial_dtv))
639 free (dtv - 1);
640
641 if (dealloc_tcb)
642 free (*tcb_to_pointer_to_free_location (tcb));
643 }
644 rtld_hidden_def (_dl_deallocate_tls)
645
646
647 #ifdef SHARED
648 /* The __tls_get_addr function has two basic forms which differ in the
649 arguments. The IA-64 form takes two parameters, the module ID and
650 offset. The form used, among others, on IA-32 takes a reference to
651 a special structure which contain the same information. The second
652 form seems to be more often used (in the moment) so we default to
653 it. Users of the IA-64 form have to provide adequate definitions
654 of the following macros. */
655 # ifndef GET_ADDR_ARGS
656 # define GET_ADDR_ARGS tls_index *ti
657 # define GET_ADDR_PARAM ti
658 # endif
659 # ifndef GET_ADDR_MODULE
660 # define GET_ADDR_MODULE ti->ti_module
661 # endif
662 # ifndef GET_ADDR_OFFSET
663 # define GET_ADDR_OFFSET ti->ti_offset
664 # endif
665
666 /* Allocate one DTV entry. */
667 static struct dtv_pointer
668 allocate_dtv_entry (size_t alignment, size_t size)
669 {
670 if (powerof2 (alignment) && alignment <= _Alignof (max_align_t))
671 {
672 /* The alignment is supported by malloc. */
673 void *ptr = malloc (size);
674 return (struct dtv_pointer) { ptr, ptr };
675 }
676
677 /* Emulate memalign to by manually aligning a pointer returned by
678 malloc. First compute the size with an overflow check. */
679 size_t alloc_size = size + alignment;
680 if (alloc_size < size)
681 return (struct dtv_pointer) {};
682
683 /* Perform the allocation. This is the pointer we need to free
684 later. */
685 void *start = malloc (alloc_size);
686 if (start == NULL)
687 return (struct dtv_pointer) {};
688
689 /* Find the aligned position within the larger allocation. */
690 void *aligned = (void *) roundup ((uintptr_t) start, alignment);
691
692 return (struct dtv_pointer) { .val = aligned, .to_free = start };
693 }
694
695 static struct dtv_pointer
696 allocate_and_init (struct link_map *map)
697 {
698 struct dtv_pointer result = allocate_dtv_entry
699 (map->l_tls_align, map->l_tls_blocksize);
700 if (result.val == NULL)
701 oom ();
702
703 /* Initialize the memory. */
704 memset (__mempcpy (result.val, map->l_tls_initimage,
705 map->l_tls_initimage_size),
706 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
707
708 return result;
709 }
710
711
712 struct link_map *
713 _dl_update_slotinfo (unsigned long int req_modid)
714 {
715 struct link_map *the_map = NULL;
716 dtv_t *dtv = THREAD_DTV ();
717
718 /* The global dl_tls_dtv_slotinfo array contains for each module
719 index the generation counter current when the entry was created.
720 This array never shrinks so that all module indices which were
721 valid at some time can be used to access it. Before the first
722 use of a new module index in this function the array was extended
723 appropriately. Access also does not have to be guarded against
724 modifications of the array. It is assumed that pointer-size
725 values can be read atomically even in SMP environments. It is
726 possible that other threads at the same time dynamically load
727 code and therefore add to the slotinfo list. This is a problem
728 since we must not pick up any information about incomplete work.
729 The solution to this is to ignore all dtv slots which were
730 created after the one we are currently interested. We know that
731 dynamic loading for this module is completed and this is the last
732 load operation we know finished. */
733 unsigned long int idx = req_modid;
734 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
735
736 while (idx >= listp->len)
737 {
738 idx -= listp->len;
739 listp = listp->next;
740 }
741
742 if (dtv[0].counter < listp->slotinfo[idx].gen)
743 {
744 /* CONCURRENCY NOTES:
745
746 Here the dtv needs to be updated to new_gen generation count.
747
748 This code may be called during TLS access when GL(dl_load_tls_lock)
749 is not held. In that case the user code has to synchronize with
750 dlopen and dlclose calls of relevant modules. A module m is
751 relevant if the generation of m <= new_gen and dlclose of m is
752 synchronized: a memory access here happens after the dlopen and
753 before the dlclose of relevant modules. The dtv entries for
754 relevant modules need to be updated, other entries can be
755 arbitrary.
756
757 This e.g. means that the first part of the slotinfo list can be
758 accessed race free, but the tail may be concurrently extended.
759 Similarly relevant slotinfo entries can be read race free, but
760 other entries are racy. However updating a non-relevant dtv
761 entry does not affect correctness. For a relevant module m,
762 max_modid >= modid of m. */
763 size_t new_gen = listp->slotinfo[idx].gen;
764 size_t total = 0;
765 size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
766 assert (max_modid >= req_modid);
767
768 /* We have to look through the entire dtv slotinfo list. */
769 listp = GL(dl_tls_dtv_slotinfo_list);
770 do
771 {
772 for (size_t cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
773 {
774 size_t modid = total + cnt;
775
776 /* Later entries are not relevant. */
777 if (modid > max_modid)
778 break;
779
780 size_t gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen);
781
782 if (gen > new_gen)
783 /* Not relevant. */
784 continue;
785
786 /* If the entry is older than the current dtv layout we
787 know we don't have to handle it. */
788 if (gen <= dtv[0].counter)
789 continue;
790
791 /* If there is no map this means the entry is empty. */
792 struct link_map *map
793 = atomic_load_relaxed (&listp->slotinfo[cnt].map);
794 /* Check whether the current dtv array is large enough. */
795 if (dtv[-1].counter < modid)
796 {
797 if (map == NULL)
798 continue;
799
800 /* Resize the dtv. */
801 dtv = _dl_resize_dtv (dtv, max_modid);
802
803 assert (modid <= dtv[-1].counter);
804
805 /* Install this new dtv in the thread data
806 structures. */
807 INSTALL_NEW_DTV (dtv);
808 }
809
810 /* If there is currently memory allocate for this
811 dtv entry free it. */
812 /* XXX Ideally we will at some point create a memory
813 pool. */
814 free (dtv[modid].pointer.to_free);
815 dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
816 dtv[modid].pointer.to_free = NULL;
817
818 if (modid == req_modid)
819 the_map = map;
820 }
821
822 total += listp->len;
823 if (total > max_modid)
824 break;
825
826 /* Synchronize with _dl_add_to_slotinfo. Ideally this would
827 be consume MO since we only need to order the accesses to
828 the next node after the read of the address and on most
829 hardware (other than alpha) a normal load would do that
830 because of the address dependency. */
831 listp = atomic_load_acquire (&listp->next);
832 }
833 while (listp != NULL);
834
835 /* This will be the new maximum generation counter. */
836 dtv[0].counter = new_gen;
837 }
838
839 return the_map;
840 }
841
842
843 static void *
844 __attribute_noinline__
845 tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
846 {
847 /* The allocation was deferred. Do it now. */
848 if (the_map == NULL)
849 {
850 /* Find the link map for this module. */
851 size_t idx = GET_ADDR_MODULE;
852 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
853
854 while (idx >= listp->len)
855 {
856 idx -= listp->len;
857 listp = listp->next;
858 }
859
860 the_map = listp->slotinfo[idx].map;
861 }
862
863 /* Make sure that, if a dlopen running in parallel forces the
864 variable into static storage, we'll wait until the address in the
865 static TLS block is set up, and use that. If we're undecided
866 yet, make sure we make the decision holding the lock as well. */
867 if (__glibc_unlikely (the_map->l_tls_offset
868 != FORCED_DYNAMIC_TLS_OFFSET))
869 {
870 __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
871 if (__glibc_likely (the_map->l_tls_offset == NO_TLS_OFFSET))
872 {
873 the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET;
874 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
875 }
876 else if (__glibc_likely (the_map->l_tls_offset
877 != FORCED_DYNAMIC_TLS_OFFSET))
878 {
879 #if TLS_TCB_AT_TP
880 void *p = (char *) THREAD_SELF - the_map->l_tls_offset;
881 #elif TLS_DTV_AT_TP
882 void *p = (char *) THREAD_SELF + the_map->l_tls_offset + TLS_PRE_TCB_SIZE;
883 #else
884 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
885 #endif
886 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
887
888 dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
889 dtv[GET_ADDR_MODULE].pointer.val = p;
890
891 return (char *) p + GET_ADDR_OFFSET;
892 }
893 else
894 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
895 }
896 struct dtv_pointer result = allocate_and_init (the_map);
897 dtv[GET_ADDR_MODULE].pointer = result;
898 assert (result.to_free != NULL);
899
900 return (char *) result.val + GET_ADDR_OFFSET;
901 }
902
903
904 static struct link_map *
905 __attribute_noinline__
906 update_get_addr (GET_ADDR_ARGS)
907 {
908 struct link_map *the_map = _dl_update_slotinfo (GET_ADDR_MODULE);
909 dtv_t *dtv = THREAD_DTV ();
910
911 void *p = dtv[GET_ADDR_MODULE].pointer.val;
912
913 if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
914 return tls_get_addr_tail (GET_ADDR_PARAM, dtv, the_map);
915
916 return (void *) p + GET_ADDR_OFFSET;
917 }
918
919 /* For all machines that have a non-macro version of __tls_get_addr, we
920 want to use rtld_hidden_proto/rtld_hidden_def in order to call the
921 internal alias for __tls_get_addr from ld.so. This avoids a PLT entry
922 in ld.so for __tls_get_addr. */
923
924 #ifndef __tls_get_addr
925 extern void * __tls_get_addr (GET_ADDR_ARGS);
926 rtld_hidden_proto (__tls_get_addr)
927 rtld_hidden_def (__tls_get_addr)
928 #endif
929
930 /* The generic dynamic and local dynamic model cannot be used in
931 statically linked applications. */
932 void *
933 __tls_get_addr (GET_ADDR_ARGS)
934 {
935 dtv_t *dtv = THREAD_DTV ();
936
937 /* Update is needed if dtv[0].counter < the generation of the accessed
938 module. The global generation counter is used here as it is easier
939 to check. Synchronization for the relaxed MO access is guaranteed
940 by user code, see CONCURRENCY NOTES in _dl_update_slotinfo. */
941 size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
942 if (__glibc_unlikely (dtv[0].counter != gen))
943 return update_get_addr (GET_ADDR_PARAM);
944
945 void *p = dtv[GET_ADDR_MODULE].pointer.val;
946
947 if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
948 return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
949
950 return (char *) p + GET_ADDR_OFFSET;
951 }
952 #endif
953
954
955 /* Look up the module's TLS block as for __tls_get_addr,
956 but never touch anything. Return null if it's not allocated yet. */
957 void *
958 _dl_tls_get_addr_soft (struct link_map *l)
959 {
960 if (__glibc_unlikely (l->l_tls_modid == 0))
961 /* This module has no TLS segment. */
962 return NULL;
963
964 dtv_t *dtv = THREAD_DTV ();
965 /* This may be called without holding the GL(dl_load_tls_lock). Reading
966 arbitrary gen value is fine since this is best effort code. */
967 size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
968 if (__glibc_unlikely (dtv[0].counter != gen))
969 {
970 /* This thread's DTV is not completely current,
971 but it might already cover this module. */
972
973 if (l->l_tls_modid >= dtv[-1].counter)
974 /* Nope. */
975 return NULL;
976
977 size_t idx = l->l_tls_modid;
978 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
979 while (idx >= listp->len)
980 {
981 idx -= listp->len;
982 listp = listp->next;
983 }
984
985 /* We've reached the slot for this module.
986 If its generation counter is higher than the DTV's,
987 this thread does not know about this module yet. */
988 if (dtv[0].counter < listp->slotinfo[idx].gen)
989 return NULL;
990 }
991
992 void *data = dtv[l->l_tls_modid].pointer.val;
993 if (__glibc_unlikely (data == TLS_DTV_UNALLOCATED))
994 /* The DTV is current, but this thread has not yet needed
995 to allocate this module's segment. */
996 data = NULL;
997
998 return data;
999 }
1000
1001
1002 void
1003 _dl_add_to_slotinfo (struct link_map *l, bool do_add)
1004 {
1005 /* Now that we know the object is loaded successfully add
1006 modules containing TLS data to the dtv info table. We
1007 might have to increase its size. */
1008 struct dtv_slotinfo_list *listp;
1009 struct dtv_slotinfo_list *prevp;
1010 size_t idx = l->l_tls_modid;
1011
1012 /* Find the place in the dtv slotinfo list. */
1013 listp = GL(dl_tls_dtv_slotinfo_list);
1014 prevp = NULL; /* Needed to shut up gcc. */
1015 do
1016 {
1017 /* Does it fit in the array of this list element? */
1018 if (idx < listp->len)
1019 break;
1020 idx -= listp->len;
1021 prevp = listp;
1022 listp = listp->next;
1023 }
1024 while (listp != NULL);
1025
1026 if (listp == NULL)
1027 {
1028 /* When we come here it means we have to add a new element
1029 to the slotinfo list. And the new module must be in
1030 the first slot. */
1031 assert (idx == 0);
1032
1033 listp = (struct dtv_slotinfo_list *)
1034 malloc (sizeof (struct dtv_slotinfo_list)
1035 + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
1036 if (listp == NULL)
1037 {
1038 /* We ran out of memory while resizing the dtv slotinfo list. */
1039 _dl_signal_error (ENOMEM, "dlopen", NULL, N_("\
1040 cannot create TLS data structures"));
1041 }
1042
1043 listp->len = TLS_SLOTINFO_SURPLUS;
1044 listp->next = NULL;
1045 memset (listp->slotinfo, '\0',
1046 TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
1047 /* Synchronize with _dl_update_slotinfo. */
1048 atomic_store_release (&prevp->next, listp);
1049 }
1050
1051 /* Add the information into the slotinfo data structure. */
1052 if (do_add)
1053 {
1054 /* Can be read concurrently. See _dl_update_slotinfo. */
1055 atomic_store_relaxed (&listp->slotinfo[idx].map, l);
1056 atomic_store_relaxed (&listp->slotinfo[idx].gen,
1057 GL(dl_tls_generation) + 1);
1058 }
1059 }
1060
1061 #if PTHREAD_IN_LIBC
1062 static inline void __attribute__((always_inline))
1063 init_one_static_tls (struct pthread *curp, struct link_map *map)
1064 {
1065 # if TLS_TCB_AT_TP
1066 void *dest = (char *) curp - map->l_tls_offset;
1067 # elif TLS_DTV_AT_TP
1068 void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1069 # else
1070 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1071 # endif
1072
1073 /* Initialize the memory. */
1074 memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1075 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
1076 }
1077
1078 void
1079 _dl_init_static_tls (struct link_map *map)
1080 {
1081 lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
1082
1083 /* Iterate over the list with system-allocated threads first. */
1084 list_t *runp;
1085 list_for_each (runp, &GL (dl_stack_used))
1086 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1087
1088 /* Now the list with threads using user-allocated stacks. */
1089 list_for_each (runp, &GL (dl_stack_user))
1090 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1091
1092 lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
1093 }
1094 #endif /* PTHREAD_IN_LIBC */
This page took 0.086631 seconds and 5 git commands to generate.