]> sourceware.org Git - systemtap.git/blame - stap-profile-annotate.in
Update nfsderrno.stp to work with Linux 6.10
[systemtap.git] / stap-profile-annotate.in
CommitLineData
2aaf9213
NS
1#!/usr/bin/python3
2
3# This script uses tapset/hit-count.stp to profile a specific process
4# or the kernel. It may take a context width, module path, pid, cmd, and timeout.
5# It generates folders based on buildid, containing subdirectories
6# leading to sourcefiles where one may read how many times the pc
7# was at a certain line in that sourcefile.
8
9
10import argparse
11import sys
12import os
13import re
14import subprocess
15import tempfile
16from collections import defaultdict
17
18parser = argparse.ArgumentParser()
19pid_cmd_group = parser.add_mutually_exclusive_group()
20pid_cmd_group.add_argument("-x", "--pid", help='PID for systemtap to target.', type=int)
21pid_cmd_group.add_argument("-c", "--cmd", help='Command for systemtap to target.', type=str)
22parser.add_argument('-d', metavar="BINARY", help='Add symbol information for given binary and its shared libraries.', type=str, action='append', default=[])
23parser.add_argument("-e", "--events", help='Override the list of profiling probe points.', type=str, default='timer.profile')
24parser.add_argument("-T", "--timeout", help="Exit in 'timeout' seconds.", type=int)
25parser.add_argument("-p", "--print", help="Print annotated source files to stdout instead of files.", action='store_true')
26parser.add_argument("-w", "--context-width", metavar="WIDTH", help='Limit number of lines of context around each hit. Defaults to unlimited.', type=int, default=-1)
27parser.add_argument("-s", "--stap", metavar="PATH", help='Override the path to the stap interpreter.', type=str)
28parser.add_argument("-v", "--verbose", help="Increase verbosity.", action='count', default=0)
29
30args = parser.parse_args()
31verbosity = args.verbose
e20ae05e 32DB_URLS = os.getenv("DEBUGINFOD_URLS")
2aaf9213
NS
33
34def vprint(level,*args):
35 if (verbosity >= level):
36 print(*args)
37
38
39stap_script="""
40global count
41global unknown
42global kernel
43global user
44probe begin {
45 system(\"echo Starting stap data collector.\") # sent to stdout of stap-profile-annotate process
46}
47probe """ + args.events + """ {
48 if (! user_mode()) {
49 kernel <<< 1
50 next
51 }
52 try {
53 if (target()==0 || target_set_pid(pid()))
54 {
55 buildid = umodbuildid(uaddr());
56 addr= umodaddr(uaddr());
57 count[buildid,addr] <<< 1;
58 user <<< 1
59 }
60 }
61 catch /*(e)*/ { unknown <<< 1 /* printf ("%s", e) */ }
62}
63
64probe timer.s(1),end
65{
66 println (\"BEGIN\");
67 foreach ( [buildid, addr] in count)
68 {
69 c = @count(count[buildid,addr]);
70 println(buildid, " " , addr, " ", c);
71 }
72 println (\"END\");
73 delete count
74}
75probe end,error
76{
77 printf (\"Counted %d known userspace hits.\\n\", @count(user))
78 if (@count(kernel))
79 printf (\"Ignored %d kernel hits.\\n\", @count(kernel))
80 if (@count(unknown))
81 printf (\"Ignored %d unknown userspace hits.\\n\", @count(unknown))
82 println(\"Stopped stap data collector.\")
83}
84"""
85
86# buildid class
87class BuildIDProfile:
88 def __init__(self,buildid):
89 self.counts = defaultdict(lambda: 0)
90 self.buildid = buildid
91 self.filename = self.buildid + 'addrs.txt'
92 self.sources = {}
93
94 def __str__(self):
95 return "BuildIDProfile(buildid %s) items: %s sources: %s" % (self.buildid, self.counts.items(), self.sources.items())
96
97 # Build the 'counts' dict by adding the hit count to its associated address
98 def accumulate(self,pc,count):
99 self.counts[pc] += count
100
101 # Get the Find the sources of relative addresses from self.counts.keys()
102 def get_sources(self):
e20ae05e 103 vprint(1,"Computing addr2line for %s" % (self.buildid))
2aaf9213
NS
104 # Used to maintain order of writing
105 ordered_keys = list(self.counts.keys())
106 # create addr file in /tmp/
107 with open('/tmp/'+self.filename, 'w') as f:
108 for k in ordered_keys:
109 f.write(str(hex(k)) + '\n')
110 vprint(2,"Dumped addresses")
111 # Get source:linenum info
112 dbginfo = self.get_debuginfo()
113 # Split the lines into a list divided by newlines
114 lines = dbginfo.split('\n')
115
116 for i in range(0,len(lines)):
117 if lines[i] == '':
118 continue
119 split = lines[i].split(':')
120 src = split[0]
121 line_number = split[1]
122 if line_number == None:
123 continue
124 if src not in self.sources.keys():
125 self.sources[src] = SourceLineProfile(self.buildid,src)
126
127 # Sometimes addr2line reponds with a string of format ("linenum" discriminator "num")
128 # trim this to yield "linenum" using a regular expression:
129 m = re.search('[0-9]+',line_number)
130 # If m doesn't contain the above regex, it has no number so don't accumulate it
131 if m == None:
132 continue
133 line_number = int(m.group(0))
134 # eu-addr2line gives outputs beginning at 1, where as in SourceLineProfiler.report
135 # the line numbering begins at 0. This offset of 1 must be reomved from eu-addr2line
136 # to ensure compatibility with SourceLineProfiler.report
137 self.sources[src].accumulate(line_number-1, self.counts[ordered_keys[i]])
138 vprint(2,"Mapped to %d source files" % (len(self.sources),))
139 # Remove tempfile
140 os.remove('/tmp/'+self.filename)
141
142 # Report information for this buildid's source files
143 def report(self,totalhits):
144 for so in self.sources.values():
145 so.report(totalhits)
146
147 # Get source:linenum information. Assumes self.filename has relative address information
148 def get_debuginfo(self):
149 try:
150 #Get the debuginfo of the bulidid retrieved from stap
151 p = subprocess.Popen(['debuginfod-find', 'debuginfo', self.buildid],stdout=subprocess.PIPE)
152 dbg_file,err = p.communicate()
153 dbg_file = dbg_file.decode('utf-8').rstrip()
e20ae05e
NS
154 if dbg_file == '' or dbg_file == None:
155 raise Exception("No debug file for bid %s from debuginfod servers: %s" % (self.bid, DB_URLS))
156 elif err != '' and err != None:
157 raise Exception(err.decode('utf-8').rstrip())
2aaf9213
NS
158 vprint(2, "Stored debuginfod-find debuginfo file as %s" % (dbg_file))
159 #Use the debuginfo attained from the above process
160 process = subprocess.Popen(['sh','-c', 'eu-addr2line -A -e ' + dbg_file + ' < /tmp/' + self.filename], stdout=subprocess.PIPE)
161 out,err = process.communicate()
162 except Exception as e:
163 print (e)
2aaf9213
NS
164 return out.decode('utf-8')
165
166
167# Contains information related to each source of a buildid
168class SourceLineProfile:
169 def __init__(self, bid, source):
170 self.bid = bid
171 self.source = source
172 self.counts = defaultdict(lambda: 0)
173
174 def __str__(self):
175 return "SourceLineProfile(bid %s, source %s) counts: %s" % (self.bid, self.source, self.counts.items())
176
177 # Accumulate hits on a line
178 def accumulate(self, line, count):
179 self.counts[line] += count
180
181 # Get the source file associated with a buildid
182 def get_source_file(self):
183 try:
184 p = subprocess.Popen(['debuginfod-find', 'source', self.bid, self.source],stdout=subprocess.PIPE)
185 sourcefile,err = p.communicate()
186 sourcefile = sourcefile.decode('utf-8').rstrip()
187 if sourcefile == '' or sourcefile == None:
e20ae05e 188 raise Exception("No source file for bid %s, source %s from debuginfod servers: %s" % (self.bid, self.source, DB_URL))
2aaf9213
NS
189 elif err != '' and err != None:
190 raise Exception(err.decode('utf-8').rstrip())
191 vprint(2, "Stored debuginfod-find source file as %s" % (sourcefile))
192 return sourcefile
193 except Exception as e:
194 print (e)
195
196 # Reporting function for the source file
197 def report(self, totalhits):
198 filehits=sum(self.counts.values())
199 if self.source == '??' or self.source == '':
200 vprint(0,"%08d (%.2f%%) hits in buildid %s with unknown source" % (filehits, filehits/totalhits*100,
201 self.bid))
202 return
203 # Retrieve the sourcefile's name
204 sourcefile = self.get_source_file()
205 if sourcefile == None or sourcefile == '':
206 return
207
208 outfile = os.path.join('profile-'+self.bid, (sourcefile.split('/')[-1]).replace('##','/'))
209
210 # Try creating the appropriate directory
211 if not args.print:
212 try:
e20ae05e
NS
213 # Begins at -1 so that when the for loop counts the profile-buildid directory the
214 # above_profile_dir is set to 0 (the intended beginning position)
215 # This saves having to either remove profile-buildid or check for it each iteration
216 # This variable represents how many directories we are above the profile-buildid
217 # directory
218 above_profile_dir = -1
219 for word in '/'.split(outfile):
220 if word == "..":
221 above_profile_dir -=1
222 else:
223 above_profile_dir += 1
224 if above_profile_dir < 0:
225 raise Exception(outfile + " descends beyond its intended root directory, profile-"+self.bid+".\nEnsuring the directory remains above profile-"+self.bid+" ... ")
226 outfile = re.sub("\/\.\.","/dotdot", outfile)
227 if not os.path.isfile(outfile):
228 os.makedirs(os.path.dirname(outfile))
229 except Exception as e:
230 print(e)
2aaf9213
NS
231
232 # Output source code to 'outfile' and if a line has associated hits (read out of sourcefile)
233 # then add the line number and hit count before that line. If a context_width is present use
234 # print the surrounding lines for context in accordance with context_width
235 vprint(0,"%07d (%.2f%%) hits in %s over %d lines." % (filehits, filehits/totalhits*100,
236 outfile, len(self.counts)))
e20ae05e
NS
237 class blob:
238 def __init__(self, lower, upper, hit):
239 self.lower = lower
240 self.upper = upper
241 self.hits = []
242 self.hits.append(hit)
243 def __str__(self):
244 if self.lower != self.upper:
245 return ("Hits: " + ', '.join(str(i) for i in self.hits) + ". Context from lines %s to %s") % (self.lower, self.upper)
246 else:
247 return ("Hits: " + ', '.join(str(i) for i in self.hits) + ". Context of line %s") % (self.upper)
2aaf9213 248
e20ae05e
NS
249 def get_context(self):
250 return "//" + str(self) +"\n"
2aaf9213 251
e20ae05e
NS
252 num_lines = sum(1 for line in open(sourcefile,'r')) - 1
253 with open(sourcefile,'r') as f, open(outfile, 'w') as of:
254 hitlines = sorted( list(self.counts.keys()) )
255 width = -1
256 if args.context_width >= 0:
257 width = int(args.context_width)
258 else:
259 width = sys.maxsize
260 upper_bound = sys.maxsize if width == sys.maxsize else hitlines[0]+width
261 lower_bound = -1 if width == sys.maxsize else hitlines[0] - width
262 # Set the first upper and lower bounds
263 context_blobs = []
264 context_blobs.append(blob(lower_bound, upper_bound, hitlines[0]))
265 blob_num = 0
266 for i in hitlines[1:]:
267 lower = i-width
268 upper = i+width
269 # - 1 to connect blobs bordering one another
270 if context_blobs[blob_num].upper >= lower-1:
271 context_blobs[blob_num].upper = upper
272 context_blobs[blob_num].hits.append(i)
273 else:
274 blob_num = blob_num+1
275 context_blobs.append(blob(lower, upper, i))
276 context_blobs[-1].upper = num_lines if context_blobs[-1].upper > num_lines else context_blobs[-1].upper
2aaf9213 277 for linenum, line, in list(enumerate(f)):
e20ae05e
NS
278 # Convenience variable
279 hits = context_blobs[0].hits
280 # If we've passed this blobs area of context, pop it
281 if context_blobs and context_blobs[0].upper < linenum:
282 context_blobs.pop(0)
283 if not context_blobs:
284 break
285 # If we have reached the beginning of a blob's context,
286 # print_context()
287 if context_blobs and linenum == context_blobs[0].lower:
288 of.write(context_blobs[0].get_context())
2aaf9213
NS
289
290 # If we have found a line with hits, output info
291 # otherwise if there is no width, don't take it into account
292 # otherwise if the current line is within the desired width
293 # print it for context
e20ae05e
NS
294 if linenum in hits:
295 of.write("%07d %s\n" % ( self.counts[linenum], line.rstrip()))
2aaf9213
NS
296 elif width == -1:
297 of.write("%7s %s\n" % ("", line))
e20ae05e
NS
298 elif context_blobs[0].lower <= linenum and linenum <= context_blobs[0].upper:
299 of.write("%7s %s\n" % ("" , line.rstrip()))
2aaf9213
NS
300
301 if not args.print: # don't close stdout
302 of.close()
303
304def __main__():
305 # We require $DEBUGINFOD_URLS
e20ae05e 306 if (not DB_URLS):
2aaf9213
NS
307 raise Exception("Required DEBUGINFOD_URLS is unset.")
308
309 # Run SystemTap
310 (tmpfd,tmpfilename) = tempfile.mkstemp()
311 stap_cmd = "@prefix@/bin/stap" # not @ bindir @ because autoconf expands that to shell $var expressions
312 stap_args = ['--ldd', '-o'+tmpfilename]
313
314 if args.cmd:
315 stap_args += ['-c', args.cmd]
316 if args.timeout:
317 if args.timeout < 0:
318 raise Exception("Timeout must be positive")
319 stap_args += ['-T', str(args.timeout)]
320 if args.pid:
321 if args.pid < 0:
322 raise Exception("pid must be positive")
323 stap_args += ['-x', str(args.pid)]
324 for d in args.d:
325 stap_args += ['-d', d]
326 if args.stap:
327 stap_cmd = args.stap
328 if args.context_width and args.context_width < -1:
329 raise Exception("context_width must be positive or -1 (for all file)")
330 stap_args += ['-e', stap_script]
331
332 vprint(1,"Building stap data collector.")
333 vprint(2,"%s %s" % (stap_cmd, stap_args))
334
335 try:
336 p = subprocess.Popen([stap_cmd] + stap_args)
337 p.communicate() # wait until process exits
338 except KeyboardInterrupt:
339 pass
340 p.kill()
341
342 buildids = {} # dict from buildid hexcode to BuildIdProfile object
343
344 outp_begin = False
345 proflines = 0
346 totalhits = 0
347
348 for line in open(tmpfilename,"r"): # read stap output, text mode
349 line = line.rstrip()
350 # All relevant output is after BEGIN and before END
351 if "BEGIN" in line:
352 outp_begin = True
353 elif "END" in line:
354 outp_begin = False
355 elif outp_begin == False:
356 if line != "": # diagnostic message
357 vprint(0,line)
358 else:
359 pass
360 else: # an actual profile record
361 try:
362 proflines += 1
363 (buildid,pc,hits) = line.split()
364 vprint(3,"(%s,%s,%s)" % (buildid,pc,hits))
365 totalhits += int(hits)
366 bidp = buildids.setdefault(buildid, BuildIDProfile(buildid))
367 # Accumulate hits for offset pc
368 bidp.accumulate(int(pc),int(hits))
369 except Exception as e: # parse error?
370 vprint(2,e)
371
372 os.remove(tmpfilename)
373
374 vprint(0, "Consumed %d profile records of %d hits across %d buildids." % (proflines, totalhits, len(buildids)))
375
376 # Output source information for each buildid
377 totalhits = sum([sum(bid.counts.values()) for bid in buildids.values()])
378 for buildid, bidp in buildids.items():
379 bidp.get_sources()
380 bidp.report(totalhits)
381
382if __name__ == '__main__':
383 __main__()
This page took 0.07258 seconds and 5 git commands to generate.