]>
Commit | Line | Data |
---|---|---|
3996f34b UD |
1 | /* Profiling of shared libraries. |
2 | Copyright (C) 1997 Free Software Foundation, Inc. | |
3 | This file is part of the GNU C Library. | |
4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Library General Public License as | |
8 | published by the Free Software Foundation; either version 2 of the | |
9 | License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Library General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Library General Public | |
17 | License along with the GNU C Library; see the file COPYING.LIB. If not, | |
18 | write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
19 | Boston, MA 02111-1307, USA. */ | |
20 | ||
21 | #include <errno.h> | |
22 | #include <fcntl.h> | |
23 | #include <inttypes.h> | |
24 | #include <link.h> | |
25 | #include <stdio.h> | |
26 | #include <stdlib.h> | |
27 | #include <string.h> | |
28 | #include <unistd.h> | |
29 | #include <sys/gmon.h> | |
30 | #include <sys/gmon_out.h> | |
31 | #include <sys/mman.h> | |
32 | #include <sys/stat.h> | |
33 | ||
34 | /* The LD_PROFILE feature has to be implemented different to the | |
35 | normal profiling using the gmon/ functions. The problem is that an | |
36 | arbitrary amount of processes simulataneously can be run using | |
37 | profiling and all write the results in the same file. To provide | |
38 | this mechanism one could implement a complicated mechanism to merge | |
39 | the content of two profiling runs or one could extend the file | |
40 | format to allow more than one data set. For the second solution we | |
41 | would have the problem that the file can grow in size beyond any | |
42 | limit and both solutions have the problem that the concurrency of | |
43 | writing the results is a big problem. | |
44 | ||
45 | Another much simpler method is to use mmap to map the same file in | |
46 | all using programs and modify the data in the mmap'ed area and so | |
47 | also automatically on the disk. Using the MAP_SHARED option of | |
48 | mmap(2) this can be done without big problems in more than one | |
49 | file. | |
50 | ||
51 | This approach is very different from the normal profiling. We have | |
52 | to use the profiling data in exactly the way they are expected to | |
0413b54c UD |
53 | be written to disk. But the normal format used by gprof is not usable |
54 | to do this. It is optimized for size. It writes the tags as single | |
55 | bytes but this means that the following 32/64 bit values are | |
56 | unaligned. | |
57 | ||
58 | Therefore we use a new format. This will look like this | |
59 | ||
60 | 0 1 2 3 <- byte is 32 bit word | |
61 | 0000 g m o n | |
62 | 0004 *version* <- GMON_SHOBJ_VERSION | |
63 | 0008 00 00 00 00 | |
64 | 000c 00 00 00 00 | |
65 | 0010 00 00 00 00 | |
66 | ||
67 | 0014 *tag* <- GMON_TAG_TIME_HIST | |
68 | 0018 ?? ?? ?? ?? | |
69 | ?? ?? ?? ?? <- 32/64 bit LowPC | |
70 | 0018+A ?? ?? ?? ?? | |
71 | ?? ?? ?? ?? <- 32/64 bit HighPC | |
72 | 0018+2*A *histsize* | |
73 | 001c+2*A *profrate* | |
74 | 0020+2*A s e c o | |
75 | 0024+2*A n d s \0 | |
76 | 0028+2*A \0 \0 \0 \0 | |
77 | 002c+2*A \0 \0 \0 | |
78 | 002f+2*A s | |
79 | ||
80 | 0030+2*A ?? ?? ?? ?? <- Count data | |
81 | ... ... | |
82 | 0030+2*A+K ?? ?? ?? ?? | |
83 | ||
84 | 0030+2*A+K *tag* <- GMON_TAG_CG_ARC | |
85 | 0034+2*A+K *lastused* | |
86 | 0038+2*A+K ?? ?? ?? ?? | |
87 | ?? ?? ?? ?? <- FromPC#1 | |
88 | 0038+3*A+K ?? ?? ?? ?? | |
89 | ?? ?? ?? ?? <- ToPC#1 | |
90 | 0038+4*A+K ?? ?? ?? ?? <- Count#1 | |
91 | ... ... ... | |
92 | 0038+(2*(CN-1)+2)*A+(CN-1)*4+K ?? ?? ?? ?? | |
93 | ?? ?? ?? ?? <- FromPC#CGN | |
94 | 0038+(2*(CN-1)+3)*A+(CN-1)*4+K ?? ?? ?? ?? | |
95 | ?? ?? ?? ?? <- ToPC#CGN | |
96 | 0038+(2*CN+2)*A+(CN-1)*4+K ?? ?? ?? ?? <- Count#CGN | |
97 | ||
98 | We put (for now? no basic block information in the file since this would | |
99 | introduce rase conditions among all the processes who want to write them. | |
100 | ||
101 | `K' is the number of count entries which is computed as | |
102 | ||
103 | textsize / HISTFRACTION | |
104 | ||
105 | `CG' in the above table is the number of call graph arcs. Normally, | |
106 | the table is sparse and the profiling code writes out only the those | |
107 | entries which are really used in the program run. But since we must | |
108 | not extend this table (the profiling file) we'll keep them all here. | |
109 | So CN can be executed in advance as | |
110 | ||
111 | MINARCS <= textsize*(ARCDENSITY/100) <= MAXARCS | |
112 | ||
113 | Now the remaining question is: how to build the data structures we can | |
114 | work with from this data. We need the from set and must associate the | |
115 | froms with all the associated tos. We will do this by constructing this | |
116 | data structures at the program start. To do this we'll simply visit all | |
117 | entries in the call graph table and add it to the appropriate list. */ | |
3996f34b UD |
118 | |
119 | extern char *_strerror_internal __P ((int, char *buf, size_t)); | |
120 | ||
121 | extern int __profile_frequency __P ((void)); | |
122 | ||
3996f34b UD |
123 | /* We define a special type to address the elements of the arc table. |
124 | This is basically the `gmon_cg_arc_record' format but it includes | |
125 | the room for the tag and it uses real types. */ | |
126 | struct here_cg_arc_record | |
127 | { | |
0413b54c UD |
128 | uintptr_t from_pc; |
129 | uintptr_t self_pc; | |
130 | uint32_t count; | |
131 | } __attribute__ ((packed)); | |
3996f34b UD |
132 | |
133 | static struct here_cg_arc_record *data; | |
134 | ||
0413b54c UD |
135 | /* This is the number of entry which have been incorporated in the toset. */ |
136 | static uint32_t narcs; | |
137 | /* This is a pointer to the object representing the number of entries | |
138 | currently in the mmaped file. At no point of time this has to be the | |
139 | same as NARCS. If it is equal all entries from the file are in our | |
140 | lists. */ | |
141 | static uint32_t *narcsp; | |
142 | ||
143 | /* Description of the currently profiled object. */ | |
144 | static long int state; | |
3996f34b | 145 | |
0413b54c UD |
146 | static volatile uint16_t *kcount; |
147 | static size_t kcountsize; | |
148 | ||
149 | struct here_tostruct | |
150 | { | |
151 | struct here_cg_arc_record volatile *here; | |
152 | uint16_t link; | |
153 | }; | |
154 | ||
155 | static uint16_t *froms; | |
156 | static size_t fromssize; | |
157 | ||
158 | static struct here_tostruct *tos; | |
159 | static size_t tossize; | |
160 | static size_t tolimit; | |
161 | static size_t toidx; | |
162 | ||
163 | static uintptr_t lowpc; | |
164 | static uintptr_t highpc; | |
165 | static size_t textsize; | |
166 | static unsigned int hashfraction; | |
167 | static unsigned int log_hashfraction; | |
168 | ||
169 | /* This is the information about the mmaped memory. */ | |
170 | static struct gmon_hdr *addr; | |
171 | static off_t expected_size; | |
172 | ||
173 | \f | |
174 | /* Set up profiling data to profile object desribed by MAP. The output | |
175 | file is found (or created) in OUTPUT_DIR. */ | |
3996f34b UD |
176 | void |
177 | _dl_start_profile (struct link_map *map, const char *output_dir) | |
178 | { | |
179 | char *filename; | |
180 | int fd; | |
181 | struct stat st; | |
182 | const ElfW(Phdr) *ph; | |
183 | ElfW(Addr) mapstart = ~((ElfW(Addr)) 0); | |
184 | ElfW(Addr) mapend = 0; | |
3996f34b UD |
185 | struct gmon_hdr gmon_hdr; |
186 | struct gmon_hist_hdr hist_hdr; | |
3996f34b | 187 | char *hist; |
0413b54c | 188 | size_t idx; |
3996f34b UD |
189 | |
190 | /* Compute the size of the sections which contain program code. */ | |
191 | for (ph = map->l_phdr; ph < &map->l_phdr[map->l_phnum]; ++ph) | |
192 | if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X)) | |
193 | { | |
194 | ElfW(Addr) start = (ph->p_vaddr & ~(_dl_pagesize - 1)); | |
195 | ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + _dl_pagesize - 1) | |
196 | & ~(_dl_pagesize - 1)); | |
197 | ||
198 | if (start < mapstart) | |
199 | mapstart = start; | |
200 | if (end > mapend) | |
201 | mapend = end; | |
202 | } | |
203 | ||
204 | /* Now we can compute the size of the profiling data. This is done | |
205 | with the same formulars as in `monstartup' (see gmon.c). */ | |
0413b54c UD |
206 | state = GMON_PROF_OFF; |
207 | lowpc = ROUNDDOWN (mapstart + map->l_addr, | |
208 | HISTFRACTION * sizeof(HISTCOUNTER)); | |
209 | highpc = ROUNDUP (mapend + map->l_addr, | |
210 | HISTFRACTION * sizeof(HISTCOUNTER)); | |
211 | textsize = highpc - lowpc; | |
212 | kcountsize = textsize / HISTFRACTION; | |
213 | hashfraction = HASHFRACTION; | |
3996f34b UD |
214 | if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) |
215 | /* If HASHFRACTION is a power of two, mcount can use shifting | |
216 | instead of integer division. Precompute shift amount. */ | |
0413b54c UD |
217 | log_hashfraction = __builtin_ffs (hashfraction * sizeof (*froms)) - 1; |
218 | else | |
219 | log_hashfraction = -1; | |
220 | fromssize = textsize / HASHFRACTION; | |
221 | tolimit = textsize * ARCDENSITY / 100; | |
222 | if (tolimit < MINARCS) | |
223 | tolimit = MINARCS; | |
224 | if (tolimit > MAXARCS) | |
225 | tolimit = MAXARCS; | |
226 | tossize = tolimit * sizeof (struct here_tostruct); | |
3996f34b UD |
227 | |
228 | expected_size = (sizeof (struct gmon_hdr) | |
0413b54c UD |
229 | + 4 + sizeof (struct gmon_hist_hdr) + kcountsize |
230 | + 4 + 4 + tossize * sizeof (struct here_cg_arc_record)); | |
3996f34b UD |
231 | |
232 | /* Create the gmon_hdr we expect or write. */ | |
233 | memset (&gmon_hdr, '\0', sizeof (struct gmon_hdr)); | |
234 | memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie)); | |
0413b54c | 235 | *(int32_t *) gmon_hdr.version = GMON_SHOBJ_VERSION; |
3996f34b UD |
236 | |
237 | /* Create the hist_hdr we expect or write. */ | |
238 | *(char **) hist_hdr.low_pc = (char *) mapstart; | |
239 | *(char **) hist_hdr.high_pc = (char *) mapend; | |
0413b54c | 240 | *(int32_t *) hist_hdr.hist_size = kcountsize / sizeof (HISTCOUNTER); |
3996f34b UD |
241 | *(int32_t *) hist_hdr.prof_rate = __profile_frequency (); |
242 | strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen)); | |
243 | hist_hdr.dimen_abbrev = 's'; | |
244 | ||
245 | /* First determine the output name. We write in the directory | |
246 | OUTPUT_DIR and the name is composed from the shared objects | |
247 | soname (or the file name) and the ending ".profile". */ | |
248 | filename = (char *) alloca (strlen (output_dir) + 1 + strlen (_dl_profile) | |
249 | + sizeof ".profile"); | |
250 | __stpcpy (__stpcpy (__stpcpy (__stpcpy (filename, output_dir), "/"), | |
251 | _dl_profile), | |
252 | ".profile"); | |
253 | ||
254 | fd = __open (filename, O_RDWR | O_CREAT, 0666); | |
255 | if (fd == -1) | |
256 | /* We cannot write the profiling data so don't do anthing. */ | |
257 | return; | |
258 | ||
259 | if (fstat (fd, &st) < 0 || !S_ISREG (st.st_mode)) | |
260 | { | |
261 | /* Not stat'able or not a regular file => don't use it. */ | |
262 | close (fd); | |
263 | return; | |
264 | } | |
265 | ||
266 | /* Test the size. If it does not match what we expect from the size | |
267 | values in the map MAP we don't use it and warn the user. */ | |
268 | if (st.st_size == 0) | |
269 | { | |
270 | /* We have to create the file. */ | |
271 | char buf[_dl_pagesize]; | |
272 | ||
273 | memset (buf, '\0', _dl_pagesize); | |
274 | ||
275 | if (__lseek (fd, expected_size & ~(_dl_pagesize - 1), SEEK_SET) == -1) | |
276 | { | |
277 | char buf[400]; | |
278 | int errnum; | |
279 | cannot_create: | |
280 | errnum = errno; | |
281 | __close (fd); | |
8f2ece69 UD |
282 | _dl_sysdep_error (filename, ": cannot create file: ", |
283 | _strerror_internal (errnum, buf, sizeof buf), | |
284 | "\n", NULL); | |
3996f34b UD |
285 | return; |
286 | } | |
287 | ||
288 | if (TEMP_FAILURE_RETRY (__write (fd, buf, (expected_size | |
289 | & (_dl_pagesize - 1)))) < 0) | |
290 | goto cannot_create; | |
291 | } | |
292 | else if (st.st_size != expected_size) | |
293 | { | |
294 | __close (fd); | |
295 | wrong_format: | |
0413b54c UD |
296 | |
297 | if (addr != NULL) | |
298 | __munmap ((void *) addr, expected_size); | |
299 | ||
8f2ece69 UD |
300 | _dl_sysdep_error (filename, |
301 | ": file is no correct profile data file for `", | |
302 | _dl_profile, "'\n", NULL); | |
3996f34b UD |
303 | return; |
304 | } | |
305 | ||
0413b54c UD |
306 | addr = (struct gmon_hdr *) __mmap (NULL, expected_size, PROT_READ|PROT_WRITE, |
307 | MAP_SHARED|MAP_FILE, fd, 0); | |
308 | if (addr == (struct gmon_hdr *) MAP_FAILED) | |
3996f34b UD |
309 | { |
310 | char buf[400]; | |
311 | int errnum = errno; | |
312 | __close (fd); | |
8f2ece69 UD |
313 | _dl_sysdep_error (filename, ": cannot map file: ", |
314 | _strerror_internal (errnum, buf, sizeof buf), | |
315 | "\n", NULL); | |
3996f34b UD |
316 | return; |
317 | } | |
318 | ||
319 | /* We don't need the file desriptor anymore. */ | |
320 | __close (fd); | |
321 | ||
322 | /* Pointer to data after the header. */ | |
323 | hist = (char *) (addr + 1); | |
0413b54c UD |
324 | kcount = (uint16_t *) ((char *) hist + sizeof (uint32_t) |
325 | + sizeof (struct gmon_hist_hdr)); | |
3996f34b UD |
326 | |
327 | /* Compute pointer to array of the arc information. */ | |
0413b54c UD |
328 | data = (struct here_cg_arc_record *) ((char *) kcount + kcountsize |
329 | + 2 * sizeof (uint32_t)); | |
330 | narcsp = (uint32_t *) (hist + sizeof (uint32_t) | |
331 | + sizeof (struct gmon_hist_hdr) + sizeof (uint32_t)); | |
3996f34b UD |
332 | |
333 | if (st.st_size == 0) | |
334 | { | |
335 | /* Create the signature. */ | |
3996f34b UD |
336 | memcpy (addr, &gmon_hdr, sizeof (struct gmon_hdr)); |
337 | ||
0413b54c UD |
338 | *(uint32_t *) hist = GMON_TAG_TIME_HIST; |
339 | memcpy (hist + sizeof (uint32_t), &hist_hdr, | |
340 | sizeof (struct gmon_hist_hdr)); | |
3996f34b | 341 | |
0413b54c UD |
342 | *(uint32_t *) (hist + sizeof (uint32_t) + sizeof (struct gmon_hist_hdr) |
343 | + kcountsize) = GMON_TAG_CG_ARC; | |
3996f34b UD |
344 | } |
345 | else | |
346 | { | |
347 | /* Test the signature in the file. */ | |
348 | if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0 | |
0413b54c UD |
349 | || *(uint32_t *) hist != GMON_TAG_TIME_HIST |
350 | || memcmp (hist + sizeof (uint32_t), &hist_hdr, | |
351 | sizeof (struct gmon_hist_hdr)) != 0 | |
352 | || (*(uint32_t *) (hist + sizeof (uint32_t) | |
353 | + sizeof (struct gmon_hist_hdr) + kcountsize) | |
354 | != GMON_TAG_CG_ARC)) | |
3996f34b UD |
355 | goto wrong_format; |
356 | } | |
357 | ||
0413b54c UD |
358 | /* Allocate memory for the froms data and the pointer to the tos records. */ |
359 | froms = (uint16_t *) calloc (fromssize + tossize, 1); | |
360 | if (froms == NULL) | |
361 | { | |
362 | __munmap ((void *) addr, expected_size); | |
363 | _dl_sysdep_fatal ("Out of memory while initializing profiler", NULL); | |
364 | /* NOTREACHED */ | |
365 | } | |
366 | ||
367 | tos = (struct here_tostruct *) ((char *) froms + fromssize); | |
368 | toidx = 0; | |
369 | ||
370 | /* Now we have to process all the arc count entries. BTW: it is | |
371 | not critical whether the *NARCSP value changes meanwhile. Before | |
372 | we enter a new entry in to toset we will check that everything is | |
373 | available in TOS. This happens in _dl_mcount. | |
374 | ||
375 | Loading the entries in reverse order should help to get the most | |
376 | frequently used entries at the front of the list. */ | |
377 | for (idx = narcs = *narcsp; idx > 0; ) | |
378 | { | |
379 | size_t from_index; | |
380 | size_t newtoidx; | |
381 | --idx; | |
382 | from_index = ((data[idx].from_pc - lowpc) | |
383 | / (hashfraction * sizeof (*froms))); | |
384 | newtoidx = toidx++; | |
385 | tos[newtoidx].here = &data[idx]; | |
386 | tos[newtoidx].link = froms[from_index]; | |
387 | froms[from_index] = newtoidx; | |
388 | } | |
389 | ||
3996f34b | 390 | /* Turn on profiling. */ |
0413b54c | 391 | state = GMON_PROF_ON; |
3996f34b UD |
392 | } |
393 | ||
394 | ||
395 | void | |
396 | _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc) | |
397 | { | |
0413b54c | 398 | if (state != GMON_PROF_ON) |
3996f34b | 399 | return; |
0413b54c | 400 | state = GMON_PROF_BUSY; |
3996f34b UD |
401 | |
402 | /* Compute relative addresses. The shared object can be loaded at | |
403 | any address. The value of frompc could be anything. We cannot | |
404 | restrict it in any way, just set to a fixed value (0) in case it | |
405 | is outside the allowed range. These calls show up as calls from | |
406 | <external> in the gprof output. */ | |
0413b54c UD |
407 | frompc -= lowpc; |
408 | if (frompc >= textsize) | |
3996f34b | 409 | frompc = 0; |
0413b54c UD |
410 | selfpc -= lowpc; |
411 | if (selfpc >= textsize) | |
412 | goto done; | |
413 | ||
3996f34b | 414 | |
0413b54c UD |
415 | done: |
416 | state = GMON_PROF_ON; | |
3996f34b | 417 | } |