]>
Commit | Line | Data |
---|---|---|
2aaf9213 NS |
1 | #!/usr/bin/python3 |
2 | ||
3 | # This script uses tapset/hit-count.stp to profile a specific process | |
4 | # or the kernel. It may take a context width, module path, pid, cmd, and timeout. | |
5 | # It generates folders based on buildid, containing subdirectories | |
6 | # leading to sourcefiles where one may read how many times the pc | |
7 | # was at a certain line in that sourcefile. | |
8 | ||
9 | ||
10 | import argparse | |
11 | import sys | |
12 | import os | |
13 | import re | |
14 | import subprocess | |
15 | import tempfile | |
16 | from collections import defaultdict | |
17 | ||
18 | parser = argparse.ArgumentParser() | |
19 | pid_cmd_group = parser.add_mutually_exclusive_group() | |
20 | pid_cmd_group.add_argument("-x", "--pid", help='PID for systemtap to target.', type=int) | |
21 | pid_cmd_group.add_argument("-c", "--cmd", help='Command for systemtap to target.', type=str) | |
22 | parser.add_argument('-d', metavar="BINARY", help='Add symbol information for given binary and its shared libraries.', type=str, action='append', default=[]) | |
23 | parser.add_argument("-e", "--events", help='Override the list of profiling probe points.', type=str, default='timer.profile') | |
24 | parser.add_argument("-T", "--timeout", help="Exit in 'timeout' seconds.", type=int) | |
25 | parser.add_argument("-p", "--print", help="Print annotated source files to stdout instead of files.", action='store_true') | |
26 | parser.add_argument("-w", "--context-width", metavar="WIDTH", help='Limit number of lines of context around each hit. Defaults to unlimited.', type=int, default=-1) | |
27 | parser.add_argument("-s", "--stap", metavar="PATH", help='Override the path to the stap interpreter.', type=str) | |
28 | parser.add_argument("-v", "--verbose", help="Increase verbosity.", action='count', default=0) | |
29 | ||
30 | args = parser.parse_args() | |
31 | verbosity = args.verbose | |
e20ae05e | 32 | DB_URLS = os.getenv("DEBUGINFOD_URLS") |
2aaf9213 NS |
33 | |
34 | def vprint(level,*args): | |
35 | if (verbosity >= level): | |
36 | print(*args) | |
37 | ||
38 | ||
39 | stap_script=""" | |
40 | global count | |
41 | global unknown | |
42 | global kernel | |
43 | global user | |
44 | probe begin { | |
45 | system(\"echo Starting stap data collector.\") # sent to stdout of stap-profile-annotate process | |
46 | } | |
47 | probe """ + args.events + """ { | |
48 | if (! user_mode()) { | |
49 | kernel <<< 1 | |
50 | next | |
51 | } | |
52 | try { | |
53 | if (target()==0 || target_set_pid(pid())) | |
54 | { | |
55 | buildid = umodbuildid(uaddr()); | |
56 | addr= umodaddr(uaddr()); | |
57 | count[buildid,addr] <<< 1; | |
58 | user <<< 1 | |
59 | } | |
60 | } | |
61 | catch /*(e)*/ { unknown <<< 1 /* printf ("%s", e) */ } | |
62 | } | |
63 | ||
64 | probe timer.s(1),end | |
65 | { | |
66 | println (\"BEGIN\"); | |
67 | foreach ( [buildid, addr] in count) | |
68 | { | |
69 | c = @count(count[buildid,addr]); | |
70 | println(buildid, " " , addr, " ", c); | |
71 | } | |
72 | println (\"END\"); | |
73 | delete count | |
74 | } | |
75 | probe end,error | |
76 | { | |
77 | printf (\"Counted %d known userspace hits.\\n\", @count(user)) | |
78 | if (@count(kernel)) | |
79 | printf (\"Ignored %d kernel hits.\\n\", @count(kernel)) | |
80 | if (@count(unknown)) | |
81 | printf (\"Ignored %d unknown userspace hits.\\n\", @count(unknown)) | |
82 | println(\"Stopped stap data collector.\") | |
83 | } | |
84 | """ | |
85 | ||
86 | # buildid class | |
87 | class BuildIDProfile: | |
88 | def __init__(self,buildid): | |
89 | self.counts = defaultdict(lambda: 0) | |
90 | self.buildid = buildid | |
91 | self.filename = self.buildid + 'addrs.txt' | |
92 | self.sources = {} | |
93 | ||
94 | def __str__(self): | |
95 | return "BuildIDProfile(buildid %s) items: %s sources: %s" % (self.buildid, self.counts.items(), self.sources.items()) | |
96 | ||
97 | # Build the 'counts' dict by adding the hit count to its associated address | |
98 | def accumulate(self,pc,count): | |
99 | self.counts[pc] += count | |
100 | ||
101 | # Get the Find the sources of relative addresses from self.counts.keys() | |
102 | def get_sources(self): | |
e20ae05e | 103 | vprint(1,"Computing addr2line for %s" % (self.buildid)) |
2aaf9213 NS |
104 | # Used to maintain order of writing |
105 | ordered_keys = list(self.counts.keys()) | |
106 | # create addr file in /tmp/ | |
107 | with open('/tmp/'+self.filename, 'w') as f: | |
108 | for k in ordered_keys: | |
109 | f.write(str(hex(k)) + '\n') | |
110 | vprint(2,"Dumped addresses") | |
111 | # Get source:linenum info | |
112 | dbginfo = self.get_debuginfo() | |
113 | # Split the lines into a list divided by newlines | |
114 | lines = dbginfo.split('\n') | |
115 | ||
116 | for i in range(0,len(lines)): | |
117 | if lines[i] == '': | |
118 | continue | |
119 | split = lines[i].split(':') | |
120 | src = split[0] | |
121 | line_number = split[1] | |
122 | if line_number == None: | |
123 | continue | |
124 | if src not in self.sources.keys(): | |
125 | self.sources[src] = SourceLineProfile(self.buildid,src) | |
126 | ||
127 | # Sometimes addr2line reponds with a string of format ("linenum" discriminator "num") | |
128 | # trim this to yield "linenum" using a regular expression: | |
129 | m = re.search('[0-9]+',line_number) | |
130 | # If m doesn't contain the above regex, it has no number so don't accumulate it | |
131 | if m == None: | |
132 | continue | |
133 | line_number = int(m.group(0)) | |
134 | # eu-addr2line gives outputs beginning at 1, where as in SourceLineProfiler.report | |
135 | # the line numbering begins at 0. This offset of 1 must be reomved from eu-addr2line | |
136 | # to ensure compatibility with SourceLineProfiler.report | |
137 | self.sources[src].accumulate(line_number-1, self.counts[ordered_keys[i]]) | |
138 | vprint(2,"Mapped to %d source files" % (len(self.sources),)) | |
139 | # Remove tempfile | |
140 | os.remove('/tmp/'+self.filename) | |
141 | ||
142 | # Report information for this buildid's source files | |
143 | def report(self,totalhits): | |
144 | for so in self.sources.values(): | |
145 | so.report(totalhits) | |
146 | ||
147 | # Get source:linenum information. Assumes self.filename has relative address information | |
148 | def get_debuginfo(self): | |
149 | try: | |
150 | #Get the debuginfo of the bulidid retrieved from stap | |
151 | p = subprocess.Popen(['debuginfod-find', 'debuginfo', self.buildid],stdout=subprocess.PIPE) | |
152 | dbg_file,err = p.communicate() | |
153 | dbg_file = dbg_file.decode('utf-8').rstrip() | |
e20ae05e NS |
154 | if dbg_file == '' or dbg_file == None: |
155 | raise Exception("No debug file for bid %s from debuginfod servers: %s" % (self.bid, DB_URLS)) | |
156 | elif err != '' and err != None: | |
157 | raise Exception(err.decode('utf-8').rstrip()) | |
2aaf9213 NS |
158 | vprint(2, "Stored debuginfod-find debuginfo file as %s" % (dbg_file)) |
159 | #Use the debuginfo attained from the above process | |
160 | process = subprocess.Popen(['sh','-c', 'eu-addr2line -A -e ' + dbg_file + ' < /tmp/' + self.filename], stdout=subprocess.PIPE) | |
161 | out,err = process.communicate() | |
162 | except Exception as e: | |
163 | print (e) | |
2aaf9213 NS |
164 | return out.decode('utf-8') |
165 | ||
166 | ||
167 | # Contains information related to each source of a buildid | |
168 | class SourceLineProfile: | |
169 | def __init__(self, bid, source): | |
170 | self.bid = bid | |
171 | self.source = source | |
172 | self.counts = defaultdict(lambda: 0) | |
173 | ||
174 | def __str__(self): | |
175 | return "SourceLineProfile(bid %s, source %s) counts: %s" % (self.bid, self.source, self.counts.items()) | |
176 | ||
177 | # Accumulate hits on a line | |
178 | def accumulate(self, line, count): | |
179 | self.counts[line] += count | |
180 | ||
181 | # Get the source file associated with a buildid | |
182 | def get_source_file(self): | |
183 | try: | |
184 | p = subprocess.Popen(['debuginfod-find', 'source', self.bid, self.source],stdout=subprocess.PIPE) | |
185 | sourcefile,err = p.communicate() | |
186 | sourcefile = sourcefile.decode('utf-8').rstrip() | |
187 | if sourcefile == '' or sourcefile == None: | |
e20ae05e | 188 | raise Exception("No source file for bid %s, source %s from debuginfod servers: %s" % (self.bid, self.source, DB_URL)) |
2aaf9213 NS |
189 | elif err != '' and err != None: |
190 | raise Exception(err.decode('utf-8').rstrip()) | |
191 | vprint(2, "Stored debuginfod-find source file as %s" % (sourcefile)) | |
192 | return sourcefile | |
193 | except Exception as e: | |
194 | print (e) | |
195 | ||
196 | # Reporting function for the source file | |
197 | def report(self, totalhits): | |
198 | filehits=sum(self.counts.values()) | |
199 | if self.source == '??' or self.source == '': | |
200 | vprint(0,"%08d (%.2f%%) hits in buildid %s with unknown source" % (filehits, filehits/totalhits*100, | |
201 | self.bid)) | |
202 | return | |
203 | # Retrieve the sourcefile's name | |
204 | sourcefile = self.get_source_file() | |
205 | if sourcefile == None or sourcefile == '': | |
206 | return | |
207 | ||
208 | outfile = os.path.join('profile-'+self.bid, (sourcefile.split('/')[-1]).replace('##','/')) | |
209 | ||
210 | # Try creating the appropriate directory | |
211 | if not args.print: | |
212 | try: | |
e20ae05e NS |
213 | # Begins at -1 so that when the for loop counts the profile-buildid directory the |
214 | # above_profile_dir is set to 0 (the intended beginning position) | |
215 | # This saves having to either remove profile-buildid or check for it each iteration | |
216 | # This variable represents how many directories we are above the profile-buildid | |
217 | # directory | |
218 | above_profile_dir = -1 | |
219 | for word in '/'.split(outfile): | |
220 | if word == "..": | |
221 | above_profile_dir -=1 | |
222 | else: | |
223 | above_profile_dir += 1 | |
224 | if above_profile_dir < 0: | |
225 | raise Exception(outfile + " descends beyond its intended root directory, profile-"+self.bid+".\nEnsuring the directory remains above profile-"+self.bid+" ... ") | |
226 | outfile = re.sub("\/\.\.","/dotdot", outfile) | |
227 | if not os.path.isfile(outfile): | |
228 | os.makedirs(os.path.dirname(outfile)) | |
229 | except Exception as e: | |
230 | print(e) | |
2aaf9213 NS |
231 | |
232 | # Output source code to 'outfile' and if a line has associated hits (read out of sourcefile) | |
233 | # then add the line number and hit count before that line. If a context_width is present use | |
234 | # print the surrounding lines for context in accordance with context_width | |
235 | vprint(0,"%07d (%.2f%%) hits in %s over %d lines." % (filehits, filehits/totalhits*100, | |
236 | outfile, len(self.counts))) | |
e20ae05e NS |
237 | class blob: |
238 | def __init__(self, lower, upper, hit): | |
239 | self.lower = lower | |
240 | self.upper = upper | |
241 | self.hits = [] | |
242 | self.hits.append(hit) | |
243 | def __str__(self): | |
244 | if self.lower != self.upper: | |
245 | return ("Hits: " + ', '.join(str(i) for i in self.hits) + ". Context from lines %s to %s") % (self.lower, self.upper) | |
246 | else: | |
247 | return ("Hits: " + ', '.join(str(i) for i in self.hits) + ". Context of line %s") % (self.upper) | |
2aaf9213 | 248 | |
e20ae05e NS |
249 | def get_context(self): |
250 | return "//" + str(self) +"\n" | |
2aaf9213 | 251 | |
e20ae05e NS |
252 | num_lines = sum(1 for line in open(sourcefile,'r')) - 1 |
253 | with open(sourcefile,'r') as f, open(outfile, 'w') as of: | |
254 | hitlines = sorted( list(self.counts.keys()) ) | |
255 | width = -1 | |
256 | if args.context_width >= 0: | |
257 | width = int(args.context_width) | |
258 | else: | |
259 | width = sys.maxsize | |
260 | upper_bound = sys.maxsize if width == sys.maxsize else hitlines[0]+width | |
261 | lower_bound = -1 if width == sys.maxsize else hitlines[0] - width | |
262 | # Set the first upper and lower bounds | |
263 | context_blobs = [] | |
264 | context_blobs.append(blob(lower_bound, upper_bound, hitlines[0])) | |
265 | blob_num = 0 | |
266 | for i in hitlines[1:]: | |
267 | lower = i-width | |
268 | upper = i+width | |
269 | # - 1 to connect blobs bordering one another | |
270 | if context_blobs[blob_num].upper >= lower-1: | |
271 | context_blobs[blob_num].upper = upper | |
272 | context_blobs[blob_num].hits.append(i) | |
273 | else: | |
274 | blob_num = blob_num+1 | |
275 | context_blobs.append(blob(lower, upper, i)) | |
276 | context_blobs[-1].upper = num_lines if context_blobs[-1].upper > num_lines else context_blobs[-1].upper | |
2aaf9213 | 277 | for linenum, line, in list(enumerate(f)): |
e20ae05e NS |
278 | # Convenience variable |
279 | hits = context_blobs[0].hits | |
280 | # If we've passed this blobs area of context, pop it | |
281 | if context_blobs and context_blobs[0].upper < linenum: | |
282 | context_blobs.pop(0) | |
283 | if not context_blobs: | |
284 | break | |
285 | # If we have reached the beginning of a blob's context, | |
286 | # print_context() | |
287 | if context_blobs and linenum == context_blobs[0].lower: | |
288 | of.write(context_blobs[0].get_context()) | |
2aaf9213 NS |
289 | |
290 | # If we have found a line with hits, output info | |
291 | # otherwise if there is no width, don't take it into account | |
292 | # otherwise if the current line is within the desired width | |
293 | # print it for context | |
e20ae05e NS |
294 | if linenum in hits: |
295 | of.write("%07d %s\n" % ( self.counts[linenum], line.rstrip())) | |
2aaf9213 NS |
296 | elif width == -1: |
297 | of.write("%7s %s\n" % ("", line)) | |
e20ae05e NS |
298 | elif context_blobs[0].lower <= linenum and linenum <= context_blobs[0].upper: |
299 | of.write("%7s %s\n" % ("" , line.rstrip())) | |
2aaf9213 NS |
300 | |
301 | if not args.print: # don't close stdout | |
302 | of.close() | |
303 | ||
304 | def __main__(): | |
305 | # We require $DEBUGINFOD_URLS | |
e20ae05e | 306 | if (not DB_URLS): |
2aaf9213 NS |
307 | raise Exception("Required DEBUGINFOD_URLS is unset.") |
308 | ||
309 | # Run SystemTap | |
310 | (tmpfd,tmpfilename) = tempfile.mkstemp() | |
311 | stap_cmd = "@prefix@/bin/stap" # not @ bindir @ because autoconf expands that to shell $var expressions | |
312 | stap_args = ['--ldd', '-o'+tmpfilename] | |
313 | ||
314 | if args.cmd: | |
315 | stap_args += ['-c', args.cmd] | |
316 | if args.timeout: | |
317 | if args.timeout < 0: | |
318 | raise Exception("Timeout must be positive") | |
319 | stap_args += ['-T', str(args.timeout)] | |
320 | if args.pid: | |
321 | if args.pid < 0: | |
322 | raise Exception("pid must be positive") | |
323 | stap_args += ['-x', str(args.pid)] | |
324 | for d in args.d: | |
325 | stap_args += ['-d', d] | |
326 | if args.stap: | |
327 | stap_cmd = args.stap | |
328 | if args.context_width and args.context_width < -1: | |
329 | raise Exception("context_width must be positive or -1 (for all file)") | |
330 | stap_args += ['-e', stap_script] | |
331 | ||
332 | vprint(1,"Building stap data collector.") | |
333 | vprint(2,"%s %s" % (stap_cmd, stap_args)) | |
334 | ||
335 | try: | |
336 | p = subprocess.Popen([stap_cmd] + stap_args) | |
337 | p.communicate() # wait until process exits | |
338 | except KeyboardInterrupt: | |
339 | pass | |
340 | p.kill() | |
341 | ||
342 | buildids = {} # dict from buildid hexcode to BuildIdProfile object | |
343 | ||
344 | outp_begin = False | |
345 | proflines = 0 | |
346 | totalhits = 0 | |
347 | ||
348 | for line in open(tmpfilename,"r"): # read stap output, text mode | |
349 | line = line.rstrip() | |
350 | # All relevant output is after BEGIN and before END | |
351 | if "BEGIN" in line: | |
352 | outp_begin = True | |
353 | elif "END" in line: | |
354 | outp_begin = False | |
355 | elif outp_begin == False: | |
356 | if line != "": # diagnostic message | |
357 | vprint(0,line) | |
358 | else: | |
359 | pass | |
360 | else: # an actual profile record | |
361 | try: | |
362 | proflines += 1 | |
363 | (buildid,pc,hits) = line.split() | |
364 | vprint(3,"(%s,%s,%s)" % (buildid,pc,hits)) | |
365 | totalhits += int(hits) | |
366 | bidp = buildids.setdefault(buildid, BuildIDProfile(buildid)) | |
367 | # Accumulate hits for offset pc | |
368 | bidp.accumulate(int(pc),int(hits)) | |
369 | except Exception as e: # parse error? | |
370 | vprint(2,e) | |
371 | ||
372 | os.remove(tmpfilename) | |
373 | ||
374 | vprint(0, "Consumed %d profile records of %d hits across %d buildids." % (proflines, totalhits, len(buildids))) | |
375 | ||
376 | # Output source information for each buildid | |
377 | totalhits = sum([sum(bid.counts.values()) for bid in buildids.values()]) | |
378 | for buildid, bidp in buildids.items(): | |
379 | bidp.get_sources() | |
380 | bidp.report(totalhits) | |
381 | ||
382 | if __name__ == '__main__': | |
383 | __main__() |