]> sourceware.org Git - glibc.git/blame - elf/dl-profile.c
Update.
[glibc.git] / elf / dl-profile.c
CommitLineData
3996f34b
UD
1/* Profiling of shared libraries.
2 Copyright (C) 1997 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21#include <errno.h>
22#include <fcntl.h>
23#include <inttypes.h>
24#include <link.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <unistd.h>
29#include <sys/gmon.h>
30#include <sys/gmon_out.h>
31#include <sys/mman.h>
32#include <sys/stat.h>
33
34/* The LD_PROFILE feature has to be implemented different to the
35 normal profiling using the gmon/ functions. The problem is that an
36 arbitrary amount of processes simulataneously can be run using
37 profiling and all write the results in the same file. To provide
38 this mechanism one could implement a complicated mechanism to merge
39 the content of two profiling runs or one could extend the file
40 format to allow more than one data set. For the second solution we
41 would have the problem that the file can grow in size beyond any
42 limit and both solutions have the problem that the concurrency of
43 writing the results is a big problem.
44
45 Another much simpler method is to use mmap to map the same file in
46 all using programs and modify the data in the mmap'ed area and so
47 also automatically on the disk. Using the MAP_SHARED option of
48 mmap(2) this can be done without big problems in more than one
49 file.
50
51 This approach is very different from the normal profiling. We have
52 to use the profiling data in exactly the way they are expected to
0413b54c
UD
53 be written to disk. But the normal format used by gprof is not usable
54 to do this. It is optimized for size. It writes the tags as single
55 bytes but this means that the following 32/64 bit values are
56 unaligned.
57
58 Therefore we use a new format. This will look like this
59
60 0 1 2 3 <- byte is 32 bit word
61 0000 g m o n
62 0004 *version* <- GMON_SHOBJ_VERSION
63 0008 00 00 00 00
64 000c 00 00 00 00
65 0010 00 00 00 00
66
67 0014 *tag* <- GMON_TAG_TIME_HIST
68 0018 ?? ?? ?? ??
69 ?? ?? ?? ?? <- 32/64 bit LowPC
70 0018+A ?? ?? ?? ??
71 ?? ?? ?? ?? <- 32/64 bit HighPC
72 0018+2*A *histsize*
73 001c+2*A *profrate*
74 0020+2*A s e c o
75 0024+2*A n d s \0
76 0028+2*A \0 \0 \0 \0
77 002c+2*A \0 \0 \0
78 002f+2*A s
79
80 0030+2*A ?? ?? ?? ?? <- Count data
81 ... ...
82 0030+2*A+K ?? ?? ?? ??
83
84 0030+2*A+K *tag* <- GMON_TAG_CG_ARC
85 0034+2*A+K *lastused*
86 0038+2*A+K ?? ?? ?? ??
87 ?? ?? ?? ?? <- FromPC#1
88 0038+3*A+K ?? ?? ?? ??
89 ?? ?? ?? ?? <- ToPC#1
90 0038+4*A+K ?? ?? ?? ?? <- Count#1
91 ... ... ...
92 0038+(2*(CN-1)+2)*A+(CN-1)*4+K ?? ?? ?? ??
93 ?? ?? ?? ?? <- FromPC#CGN
94 0038+(2*(CN-1)+3)*A+(CN-1)*4+K ?? ?? ?? ??
95 ?? ?? ?? ?? <- ToPC#CGN
96 0038+(2*CN+2)*A+(CN-1)*4+K ?? ?? ?? ?? <- Count#CGN
97
98 We put (for now? no basic block information in the file since this would
99 introduce rase conditions among all the processes who want to write them.
100
101 `K' is the number of count entries which is computed as
102
103 textsize / HISTFRACTION
104
105 `CG' in the above table is the number of call graph arcs. Normally,
106 the table is sparse and the profiling code writes out only the those
107 entries which are really used in the program run. But since we must
108 not extend this table (the profiling file) we'll keep them all here.
109 So CN can be executed in advance as
110
111 MINARCS <= textsize*(ARCDENSITY/100) <= MAXARCS
112
113 Now the remaining question is: how to build the data structures we can
114 work with from this data. We need the from set and must associate the
115 froms with all the associated tos. We will do this by constructing this
116 data structures at the program start. To do this we'll simply visit all
117 entries in the call graph table and add it to the appropriate list. */
3996f34b
UD
118
119extern char *_strerror_internal __P ((int, char *buf, size_t));
120
121extern int __profile_frequency __P ((void));
122
3996f34b
UD
123/* We define a special type to address the elements of the arc table.
124 This is basically the `gmon_cg_arc_record' format but it includes
125 the room for the tag and it uses real types. */
126struct here_cg_arc_record
127 {
0413b54c
UD
128 uintptr_t from_pc;
129 uintptr_t self_pc;
130 uint32_t count;
131 } __attribute__ ((packed));
3996f34b
UD
132
133static struct here_cg_arc_record *data;
134
0413b54c
UD
135/* This is the number of entry which have been incorporated in the toset. */
136static uint32_t narcs;
137/* This is a pointer to the object representing the number of entries
138 currently in the mmaped file. At no point of time this has to be the
139 same as NARCS. If it is equal all entries from the file are in our
140 lists. */
141static uint32_t *narcsp;
142
143/* Description of the currently profiled object. */
144static long int state;
3996f34b 145
0413b54c
UD
146static volatile uint16_t *kcount;
147static size_t kcountsize;
148
149struct here_tostruct
150 {
151 struct here_cg_arc_record volatile *here;
152 uint16_t link;
153 };
154
155static uint16_t *froms;
156static size_t fromssize;
157
158static struct here_tostruct *tos;
159static size_t tossize;
160static size_t tolimit;
161static size_t toidx;
162
163static uintptr_t lowpc;
164static uintptr_t highpc;
165static size_t textsize;
166static unsigned int hashfraction;
167static unsigned int log_hashfraction;
168
169/* This is the information about the mmaped memory. */
170static struct gmon_hdr *addr;
171static off_t expected_size;
172
173\f
174/* Set up profiling data to profile object desribed by MAP. The output
175 file is found (or created) in OUTPUT_DIR. */
3996f34b
UD
176void
177_dl_start_profile (struct link_map *map, const char *output_dir)
178{
179 char *filename;
180 int fd;
181 struct stat st;
182 const ElfW(Phdr) *ph;
183 ElfW(Addr) mapstart = ~((ElfW(Addr)) 0);
184 ElfW(Addr) mapend = 0;
3996f34b
UD
185 struct gmon_hdr gmon_hdr;
186 struct gmon_hist_hdr hist_hdr;
3996f34b 187 char *hist;
0413b54c 188 size_t idx;
3996f34b
UD
189
190 /* Compute the size of the sections which contain program code. */
191 for (ph = map->l_phdr; ph < &map->l_phdr[map->l_phnum]; ++ph)
192 if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X))
193 {
194 ElfW(Addr) start = (ph->p_vaddr & ~(_dl_pagesize - 1));
195 ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + _dl_pagesize - 1)
196 & ~(_dl_pagesize - 1));
197
198 if (start < mapstart)
199 mapstart = start;
200 if (end > mapend)
201 mapend = end;
202 }
203
204 /* Now we can compute the size of the profiling data. This is done
205 with the same formulars as in `monstartup' (see gmon.c). */
0413b54c
UD
206 state = GMON_PROF_OFF;
207 lowpc = ROUNDDOWN (mapstart + map->l_addr,
208 HISTFRACTION * sizeof(HISTCOUNTER));
209 highpc = ROUNDUP (mapend + map->l_addr,
210 HISTFRACTION * sizeof(HISTCOUNTER));
211 textsize = highpc - lowpc;
212 kcountsize = textsize / HISTFRACTION;
213 hashfraction = HASHFRACTION;
3996f34b
UD
214 if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
215 /* If HASHFRACTION is a power of two, mcount can use shifting
216 instead of integer division. Precompute shift amount. */
0413b54c
UD
217 log_hashfraction = __builtin_ffs (hashfraction * sizeof (*froms)) - 1;
218 else
219 log_hashfraction = -1;
220 fromssize = textsize / HASHFRACTION;
221 tolimit = textsize * ARCDENSITY / 100;
222 if (tolimit < MINARCS)
223 tolimit = MINARCS;
224 if (tolimit > MAXARCS)
225 tolimit = MAXARCS;
226 tossize = tolimit * sizeof (struct here_tostruct);
3996f34b
UD
227
228 expected_size = (sizeof (struct gmon_hdr)
0413b54c
UD
229 + 4 + sizeof (struct gmon_hist_hdr) + kcountsize
230 + 4 + 4 + tossize * sizeof (struct here_cg_arc_record));
3996f34b
UD
231
232 /* Create the gmon_hdr we expect or write. */
233 memset (&gmon_hdr, '\0', sizeof (struct gmon_hdr));
234 memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie));
0413b54c 235 *(int32_t *) gmon_hdr.version = GMON_SHOBJ_VERSION;
3996f34b
UD
236
237 /* Create the hist_hdr we expect or write. */
238 *(char **) hist_hdr.low_pc = (char *) mapstart;
239 *(char **) hist_hdr.high_pc = (char *) mapend;
0413b54c 240 *(int32_t *) hist_hdr.hist_size = kcountsize / sizeof (HISTCOUNTER);
3996f34b
UD
241 *(int32_t *) hist_hdr.prof_rate = __profile_frequency ();
242 strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
243 hist_hdr.dimen_abbrev = 's';
244
245 /* First determine the output name. We write in the directory
246 OUTPUT_DIR and the name is composed from the shared objects
247 soname (or the file name) and the ending ".profile". */
248 filename = (char *) alloca (strlen (output_dir) + 1 + strlen (_dl_profile)
249 + sizeof ".profile");
250 __stpcpy (__stpcpy (__stpcpy (__stpcpy (filename, output_dir), "/"),
251 _dl_profile),
252 ".profile");
253
254 fd = __open (filename, O_RDWR | O_CREAT, 0666);
255 if (fd == -1)
256 /* We cannot write the profiling data so don't do anthing. */
257 return;
258
259 if (fstat (fd, &st) < 0 || !S_ISREG (st.st_mode))
260 {
261 /* Not stat'able or not a regular file => don't use it. */
262 close (fd);
263 return;
264 }
265
266 /* Test the size. If it does not match what we expect from the size
267 values in the map MAP we don't use it and warn the user. */
268 if (st.st_size == 0)
269 {
270 /* We have to create the file. */
271 char buf[_dl_pagesize];
272
273 memset (buf, '\0', _dl_pagesize);
274
275 if (__lseek (fd, expected_size & ~(_dl_pagesize - 1), SEEK_SET) == -1)
276 {
277 char buf[400];
278 int errnum;
279 cannot_create:
280 errnum = errno;
281 __close (fd);
8f2ece69
UD
282 _dl_sysdep_error (filename, ": cannot create file: ",
283 _strerror_internal (errnum, buf, sizeof buf),
284 "\n", NULL);
3996f34b
UD
285 return;
286 }
287
288 if (TEMP_FAILURE_RETRY (__write (fd, buf, (expected_size
289 & (_dl_pagesize - 1)))) < 0)
290 goto cannot_create;
291 }
292 else if (st.st_size != expected_size)
293 {
294 __close (fd);
295 wrong_format:
0413b54c
UD
296
297 if (addr != NULL)
298 __munmap ((void *) addr, expected_size);
299
8f2ece69
UD
300 _dl_sysdep_error (filename,
301 ": file is no correct profile data file for `",
302 _dl_profile, "'\n", NULL);
3996f34b
UD
303 return;
304 }
305
0413b54c
UD
306 addr = (struct gmon_hdr *) __mmap (NULL, expected_size, PROT_READ|PROT_WRITE,
307 MAP_SHARED|MAP_FILE, fd, 0);
308 if (addr == (struct gmon_hdr *) MAP_FAILED)
3996f34b
UD
309 {
310 char buf[400];
311 int errnum = errno;
312 __close (fd);
8f2ece69
UD
313 _dl_sysdep_error (filename, ": cannot map file: ",
314 _strerror_internal (errnum, buf, sizeof buf),
315 "\n", NULL);
3996f34b
UD
316 return;
317 }
318
319 /* We don't need the file desriptor anymore. */
320 __close (fd);
321
322 /* Pointer to data after the header. */
323 hist = (char *) (addr + 1);
0413b54c
UD
324 kcount = (uint16_t *) ((char *) hist + sizeof (uint32_t)
325 + sizeof (struct gmon_hist_hdr));
3996f34b
UD
326
327 /* Compute pointer to array of the arc information. */
0413b54c
UD
328 data = (struct here_cg_arc_record *) ((char *) kcount + kcountsize
329 + 2 * sizeof (uint32_t));
330 narcsp = (uint32_t *) (hist + sizeof (uint32_t)
331 + sizeof (struct gmon_hist_hdr) + sizeof (uint32_t));
3996f34b
UD
332
333 if (st.st_size == 0)
334 {
335 /* Create the signature. */
3996f34b
UD
336 memcpy (addr, &gmon_hdr, sizeof (struct gmon_hdr));
337
0413b54c
UD
338 *(uint32_t *) hist = GMON_TAG_TIME_HIST;
339 memcpy (hist + sizeof (uint32_t), &hist_hdr,
340 sizeof (struct gmon_hist_hdr));
3996f34b 341
0413b54c
UD
342 *(uint32_t *) (hist + sizeof (uint32_t) + sizeof (struct gmon_hist_hdr)
343 + kcountsize) = GMON_TAG_CG_ARC;
3996f34b
UD
344 }
345 else
346 {
347 /* Test the signature in the file. */
348 if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0
0413b54c
UD
349 || *(uint32_t *) hist != GMON_TAG_TIME_HIST
350 || memcmp (hist + sizeof (uint32_t), &hist_hdr,
351 sizeof (struct gmon_hist_hdr)) != 0
352 || (*(uint32_t *) (hist + sizeof (uint32_t)
353 + sizeof (struct gmon_hist_hdr) + kcountsize)
354 != GMON_TAG_CG_ARC))
3996f34b
UD
355 goto wrong_format;
356 }
357
0413b54c
UD
358 /* Allocate memory for the froms data and the pointer to the tos records. */
359 froms = (uint16_t *) calloc (fromssize + tossize, 1);
360 if (froms == NULL)
361 {
362 __munmap ((void *) addr, expected_size);
363 _dl_sysdep_fatal ("Out of memory while initializing profiler", NULL);
364 /* NOTREACHED */
365 }
366
367 tos = (struct here_tostruct *) ((char *) froms + fromssize);
368 toidx = 0;
369
370 /* Now we have to process all the arc count entries. BTW: it is
371 not critical whether the *NARCSP value changes meanwhile. Before
372 we enter a new entry in to toset we will check that everything is
373 available in TOS. This happens in _dl_mcount.
374
375 Loading the entries in reverse order should help to get the most
376 frequently used entries at the front of the list. */
377 for (idx = narcs = *narcsp; idx > 0; )
378 {
379 size_t from_index;
380 size_t newtoidx;
381 --idx;
382 from_index = ((data[idx].from_pc - lowpc)
383 / (hashfraction * sizeof (*froms)));
384 newtoidx = toidx++;
385 tos[newtoidx].here = &data[idx];
386 tos[newtoidx].link = froms[from_index];
387 froms[from_index] = newtoidx;
388 }
389
3996f34b 390 /* Turn on profiling. */
0413b54c 391 state = GMON_PROF_ON;
3996f34b
UD
392}
393
394
395void
396_dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc)
397{
0413b54c 398 if (state != GMON_PROF_ON)
3996f34b 399 return;
0413b54c 400 state = GMON_PROF_BUSY;
3996f34b
UD
401
402 /* Compute relative addresses. The shared object can be loaded at
403 any address. The value of frompc could be anything. We cannot
404 restrict it in any way, just set to a fixed value (0) in case it
405 is outside the allowed range. These calls show up as calls from
406 <external> in the gprof output. */
0413b54c
UD
407 frompc -= lowpc;
408 if (frompc >= textsize)
3996f34b 409 frompc = 0;
0413b54c
UD
410 selfpc -= lowpc;
411 if (selfpc >= textsize)
412 goto done;
413
3996f34b 414
0413b54c
UD
415 done:
416 state = GMON_PROF_ON;
3996f34b 417}
This page took 0.066449 seconds and 5 git commands to generate.