[systemtap.git] / stap-profile-annotate.in

#!/usr/bin/python3

# This script uses tapset/hit-count.stp to profile a specific process
# or the kernel. It may take a context width, module path, pid, cmd, and timeout.
# It generates folders based on buildid, containing subdirectories
# leading to sourcefiles where one may read how many times the pc
# was at a certain line in that sourcefile.


import argparse
import sys
import os
import re
import subprocess
import tempfile
from collections import defaultdict

parser = argparse.ArgumentParser()
pid_cmd_group = parser.add_mutually_exclusive_group()
pid_cmd_group.add_argument("-x", "--pid", help='PID for systemtap to target.', type=int)
pid_cmd_group.add_argument("-c", "--cmd", help='Command for systemtap to target.', type=str)
parser.add_argument('-d', metavar="BINARY", help='Add symbol information for given binary and its shared libraries.', type=str, action='append', default=[])
parser.add_argument("-e", "--events", help='Override the list of profiling probe points.', type=str, default='timer.profile')
parser.add_argument("-T", "--timeout", help="Exit in 'timeout' seconds.", type=int)
parser.add_argument("-p", "--print", help="Print annotated source files to stdout instead of files.", action='store_true')
parser.add_argument("-w", "--context-width", metavar="WIDTH", help='Limit number of lines of context around each hit.  Defaults to unlimited.', type=int, default=-1)
parser.add_argument("-s", "--stap", metavar="PATH", help='Override the path to the stap interpreter.', type=str)
parser.add_argument("-v", "--verbose", help="Increase verbosity.", action='count', default=0)

args = parser.parse_args()
verbosity = args.verbose
DB_URLS = os.getenv("DEBUGINFOD_URLS")

def vprint(level,*args):
    if (verbosity >= level):
        print(*args)


stap_script="""
global count
global unknown
global kernel
global user
probe begin {
  system(\"echo Starting stap data collector.\") # sent to stdout of stap-profile-annotate process
}
probe """ + args.events + """ {
  if (! user_mode()) {
    kernel <<< 1
    next
  }
  try {
    if (target()==0 || target_set_pid(pid()))
      {
        buildid = umodbuildid(uaddr());
        addr= umodaddr(uaddr());
        count[buildid,addr] <<< 1;
        user <<< 1
      }
  }
  catch /*(e)*/ { unknown <<< 1 /* printf ("%s", e) */ }
}

probe timer.s(1),end
{
  println (\"BEGIN\");
  foreach ( [buildid, addr] in count)
    {
      c = @count(count[buildid,addr]);
      println(buildid, " " , addr, " ", c);
    }
  println (\"END\");
  delete count
}
probe end,error
{
  printf (\"Counted %d known userspace hits.\\n\", @count(user))
  if (@count(kernel))
    printf (\"Ignored %d kernel hits.\\n\", @count(kernel))
  if (@count(unknown))
    printf (\"Ignored %d unknown userspace hits.\\n\", @count(unknown))
  println(\"Stopped stap data collector.\")
}
"""

# buildid class
class BuildIDProfile:
    def __init__(self,buildid):
        self.counts = defaultdict(lambda: 0)
        self.buildid = buildid
        self.filename = self.buildid + 'addrs.txt'
        self.sources = {}

    def __str__(self):
        return "BuildIDProfile(buildid %s) items: %s sources: %s" % (self.buildid, self.counts.items(), self.sources.items())
    
    # Build the 'counts' dict by adding the hit count to its associated address
    def accumulate(self,pc,count):
        self.counts[pc] += count

    # Get the Find the sources of relative addresses from self.counts.keys()
    def get_sources(self):
        vprint(1,"Computing addr2line for %s" % (self.buildid))
        # Used to maintain order of writing
        ordered_keys = list(self.counts.keys())
        # create addr file in /tmp/
        with open('/tmp/'+self.filename, 'w') as f:
            for k in ordered_keys:
                f.write(str(hex(k)) + '\n')
        vprint(2,"Dumped addresses")
        # Get source:linenum info 
        dbginfo = self.get_debuginfo()
        # Split the lines into a list divided by newlines
        lines = dbginfo.split('\n')

        for i in range(0,len(lines)):
            if lines[i] == '':
                continue
            split = lines[i].split(':')
            src = split[0]
            line_number = split[1]
            if line_number == None:
                continue
            if src not in self.sources.keys():
                self.sources[src] = SourceLineProfile(self.buildid,src)
 
            # Sometimes addr2line reponds with a string of format ("linenum" discriminator "num")
            # trim this to yield "linenum" using a regular expression:
            m = re.search('[0-9]+',line_number)
            # If m doesn't contain the above regex, it has no number so don't accumulate it
            if m == None:
                continue
            line_number = int(m.group(0))
            # eu-addr2line gives outputs beginning at 1, where as in SourceLineProfiler.report
            # the line numbering begins at 0. This offset of 1 must be reomved from eu-addr2line
            # to ensure compatibility with SourceLineProfiler.report
            self.sources[src].accumulate(line_number-1, self.counts[ordered_keys[i]])
        vprint(2,"Mapped to %d source files" % (len(self.sources),))
        # Remove tempfile
        os.remove('/tmp/'+self.filename)

    # Report information for this buildid's source files
    def report(self,totalhits):
        for so in self.sources.values():
            so.report(totalhits)

    # Get source:linenum information. Assumes self.filename has relative address information
    def get_debuginfo(self):
        try:
            #Get the debuginfo of the bulidid retrieved from stap
            p = subprocess.Popen(['debuginfod-find', 'debuginfo', self.buildid],stdout=subprocess.PIPE)
            dbg_file,err = p.communicate()
            dbg_file = dbg_file.decode('utf-8').rstrip()
            if dbg_file == '' or dbg_file == None:
                raise Exception("No debug file for bid %s from debuginfod servers: %s" % (self.bid, DB_URLS))
            elif err != '' and err != None:
                raise Exception(err.decode('utf-8').rstrip())
            vprint(2, "Stored debuginfod-find debuginfo file as %s" % (dbg_file))
            #Use the debuginfo attained from the above process
            process = subprocess.Popen(['sh','-c', 'eu-addr2line -A -e '  + dbg_file + ' < /tmp/' + self.filename],  stdout=subprocess.PIPE)
            out,err = process.communicate()
        except Exception as e:
            print (e)
        return out.decode('utf-8')


# Contains information related to each source of a buildid
class SourceLineProfile:
    def __init__(self,  bid, source):
        self.bid = bid
        self.source = source
        self.counts = defaultdict(lambda: 0)

    def __str__(self):
        return "SourceLineProfile(bid %s, source %s) counts: %s" % (self.bid, self.source, self.counts.items())

    # Accumulate hits on a line
    def accumulate(self, line, count):
        self.counts[line] += count

    # Get the source file associated with a buildid
    def get_source_file(self):
        try: 
            p = subprocess.Popen(['debuginfod-find', 'source', self.bid, self.source],stdout=subprocess.PIPE)
            sourcefile,err = p.communicate()
            sourcefile = sourcefile.decode('utf-8').rstrip()
            if sourcefile == '' or sourcefile == None:
                raise Exception("No source file for bid %s, source %s from debuginfod servers: %s" % (self.bid, self.source, DB_URL))
            elif err != '' and err != None:
                raise Exception(err.decode('utf-8').rstrip())
            vprint(2, "Stored debuginfod-find source file as %s" % (sourcefile))
            return sourcefile
        except Exception as e:
            print (e)

    # Reporting function for the source file
    def report(self, totalhits):
        filehits=sum(self.counts.values())
        if self.source == '??' or self.source == '':
            vprint(0,"%08d (%.2f%%) hits in buildid %s with unknown source" % (filehits, filehits/totalhits*100,
                                                                               self.bid))
            return
        # Retrieve the sourcefile's name 
        sourcefile = self.get_source_file()
        if sourcefile == None or sourcefile == '':
            return 

        outfile = os.path.join('profile-'+self.bid, (sourcefile.split('/')[-1]).replace('##','/'))

        # Try creating the appropriate directory
        if not args.print:
            try:
                # Begins at -1 so that when the for loop counts the profile-buildid directory the
                # above_profile_dir is set to 0 (the intended beginning position)
                # This saves having to either remove profile-buildid or check for it each iteration
                # This variable represents how many directories we are above the profile-buildid
                # directory
                above_profile_dir = -1
                for word in '/'.split(outfile):
                    if word == "..":
                        above_profile_dir -=1
                    else:
                        above_profile_dir += 1
                    if above_profile_dir < 0:
                        raise Exception(outfile + " descends beyond its intended root directory, profile-"+self.bid+".\nEnsuring the directory remains above profile-"+self.bid+" ... ")
                outfile = re.sub("\/\.\.","/dotdot", outfile)
                if not os.path.isfile(outfile):
                    os.makedirs(os.path.dirname(outfile))
            except Exception as e:
                print(e)

        # Output source code to 'outfile' and if a line has associated hits (read out of sourcefile)
        # then add the line number and hit count before that line. If a context_width is present use
        # print the surrounding lines for context in accordance with context_width
        vprint(0,"%07d (%.2f%%) hits in %s over %d lines." % (filehits, filehits/totalhits*100,
                                                             outfile, len(self.counts)))
        class blob:
            def __init__(self, lower, upper, hit):
                self.lower = lower
                self.upper = upper
                self.hits = []
                self.hits.append(hit)
            def __str__(self):
                if self.lower != self.upper:
                    return ("Hits: " + ', '.join(str(i) for i in self.hits) + ". Context from lines %s to %s") % (self.lower, self.upper)
                else:
                    return ("Hits: " + ', '.join(str(i) for i in self.hits) + ". Context of line %s") % (self.upper)

            def get_context(self):
                return "//" + str(self) +"\n"

        num_lines = sum(1 for line in open(sourcefile,'r')) - 1
        with open(sourcefile,'r') as f, open(outfile, 'w') as of:
            hitlines = sorted( list(self.counts.keys()) )
            width = -1
            if args.context_width >= 0:
                width = int(args.context_width)
            else:
                width = sys.maxsize
            upper_bound = sys.maxsize if width == sys.maxsize else hitlines[0]+width
            lower_bound = -1 if width == sys.maxsize else hitlines[0] - width
            # Set the first upper and lower bounds
            context_blobs = []
            context_blobs.append(blob(lower_bound, upper_bound, hitlines[0]))
            blob_num = 0
            for i in hitlines[1:]:
                lower = i-width
                upper = i+width
                # - 1 to connect blobs bordering one another
                if context_blobs[blob_num].upper >= lower-1:
                    context_blobs[blob_num].upper = upper
                    context_blobs[blob_num].hits.append(i)
                else:
                    blob_num = blob_num+1
                    context_blobs.append(blob(lower, upper, i))
            context_blobs[-1].upper = num_lines if context_blobs[-1].upper > num_lines else context_blobs[-1].upper
            for linenum, line, in list(enumerate(f)):
                # Convenience variable
                hits = context_blobs[0].hits
                # If we've passed this blobs area of context, pop it
                if context_blobs and context_blobs[0].upper < linenum:
                    context_blobs.pop(0)
                if not context_blobs:
                    break
                # If we have reached the beginning of a blob's context,
                # print_context()
                if context_blobs and linenum == context_blobs[0].lower:
                    of.write(context_blobs[0].get_context())

                # If we have found a line with hits, output info
                # otherwise if there is no width, don't take it into account
                # otherwise if the current line is within the desired width
                #  print it for context
                if linenum in hits:
                    of.write("%07d %s\n" % ( self.counts[linenum], line.rstrip()))
                elif width == -1:
                    of.write("%7s %s\n" % ("", line))
                elif context_blobs[0].lower <= linenum and linenum <= context_blobs[0].upper:
                    of.write("%7s %s\n" % ("" , line.rstrip()))

            if not args.print: # don't close stdout
                of.close()

def __main__():
    # We require $DEBUGINFOD_URLS
    if (not DB_URLS):
        raise Exception("Required DEBUGINFOD_URLS is unset.")
    
    # Run SystemTap
    (tmpfd,tmpfilename) = tempfile.mkstemp()
    stap_cmd = "@prefix@/bin/stap"  # not @ bindir @ because autoconf expands that to shell $var expressions
    stap_args = ['--ldd', '-o'+tmpfilename]

    if args.cmd:
        stap_args += ['-c', args.cmd]
    if args.timeout:
        if args.timeout < 0:
            raise Exception("Timeout must be positive")
        stap_args += ['-T', str(args.timeout)]
    if args.pid:
        if args.pid < 0:
            raise Exception("pid must be positive")
        stap_args += ['-x', str(args.pid)]
    for d in args.d:
        stap_args += ['-d', d]
    if args.stap:
        stap_cmd = args.stap
    if args.context_width and args.context_width < -1:
        raise Exception("context_width must be positive or -1 (for all file)")
    stap_args += ['-e', stap_script]

    vprint(1,"Building stap data collector.")
    vprint(2,"%s %s" % (stap_cmd, stap_args))

    try:
        p = subprocess.Popen([stap_cmd] + stap_args)
        p.communicate() # wait until process exits
    except KeyboardInterrupt:
        pass
    p.kill()
    
    buildids = {} # dict from buildid hexcode to BuildIdProfile object
    
    outp_begin = False
    proflines = 0
    totalhits = 0

    for line in open(tmpfilename,"r"): # read stap output, text mode
        line = line.rstrip()
        # All relevant output is after BEGIN and before END
        if "BEGIN" in line:
            outp_begin = True
        elif "END" in line:
            outp_begin = False
        elif outp_begin == False:
            if line != "": # diagnostic message
                vprint(0,line)
            else:
                pass
        else: # an actual profile record
            try:
                proflines += 1
                (buildid,pc,hits) = line.split()
                vprint(3,"(%s,%s,%s)" % (buildid,pc,hits))
                totalhits += int(hits)
                bidp = buildids.setdefault(buildid, BuildIDProfile(buildid))
                # Accumulate hits for offset pc
                bidp.accumulate(int(pc),int(hits))
            except Exception as e: # parse error?
                vprint(2,e)

    os.remove(tmpfilename)
        
    vprint(0, "Consumed %d profile records of %d hits across %d buildids." % (proflines, totalhits, len(buildids)))
        
    # Output source information for each buildid
    totalhits = sum([sum(bid.counts.values()) for bid in buildids.values()])
    for buildid, bidp in buildids.items():
        bidp.get_sources()
        bidp.report(totalhits)

if __name__ == '__main__':
    __main__()
Commit	Line	Data
2aaf9213 NS	1	#!/usr/bin/python3
	2
	3	# This script uses tapset/hit-count.stp to profile a specific process
	4	# or the kernel. It may take a context width, module path, pid, cmd, and timeout.
	5	# It generates folders based on buildid, containing subdirectories
	6	# leading to sourcefiles where one may read how many times the pc
	7	# was at a certain line in that sourcefile.
	8
	9
	10	import argparse
	11	import sys
	12	import os
	13	import re
	14	import subprocess
	15	import tempfile
	16	from collections import defaultdict
	17
	18	parser = argparse.ArgumentParser()
	19	pid_cmd_group = parser.add_mutually_exclusive_group()
	20	pid_cmd_group.add_argument("-x", "--pid", help='PID for systemtap to target.', type=int)
	21	pid_cmd_group.add_argument("-c", "--cmd", help='Command for systemtap to target.', type=str)
	22	parser.add_argument('-d', metavar="BINARY", help='Add symbol information for given binary and its shared libraries.', type=str, action='append', default=[])
	23	parser.add_argument("-e", "--events", help='Override the list of profiling probe points.', type=str, default='timer.profile')
	24	parser.add_argument("-T", "--timeout", help="Exit in 'timeout' seconds.", type=int)
	25	parser.add_argument("-p", "--print", help="Print annotated source files to stdout instead of files.", action='store_true')
	26	parser.add_argument("-w", "--context-width", metavar="WIDTH", help='Limit number of lines of context around each hit. Defaults to unlimited.', type=int, default=-1)
	27	parser.add_argument("-s", "--stap", metavar="PATH", help='Override the path to the stap interpreter.', type=str)
	28	parser.add_argument("-v", "--verbose", help="Increase verbosity.", action='count', default=0)
	29
	30	args = parser.parse_args()
	31	verbosity = args.verbose
e20ae05e	32	DB_URLS = os.getenv("DEBUGINFOD_URLS")
2aaf9213 NS	33
	34	def vprint(level,*args):
	35	if (verbosity >= level):
	36	print(*args)
	37
	38
	39	stap_script="""
	40	global count
	41	global unknown
	42	global kernel
	43	global user
	44	probe begin {
	45	system(\"echo Starting stap data collector.\") # sent to stdout of stap-profile-annotate process
	46	}
	47	probe """ + args.events + """ {
	48	if (! user_mode()) {
	49	kernel <<< 1
	50	next
	51	}
	52	try {
	53	if (target()==0 \|\| target_set_pid(pid()))
	54	{
	55	buildid = umodbuildid(uaddr());
	56	addr= umodaddr(uaddr());
	57	count[buildid,addr] <<< 1;
	58	user <<< 1
	59	}
	60	}
	61	catch /(e)/ { unknown <<< 1 /* printf ("%s", e) */ }
	62	}
	63
	64	probe timer.s(1),end
	65	{
	66	println (\"BEGIN\");
	67	foreach ( [buildid, addr] in count)
	68	{
	69	c = @count(count[buildid,addr]);
	70	println(buildid, " " , addr, " ", c);
	71	}
	72	println (\"END\");
	73	delete count
	74	}
	75	probe end,error
	76	{
	77	printf (\"Counted %d known userspace hits.\\n\", @count(user))
	78	if (@count(kernel))
	79	printf (\"Ignored %d kernel hits.\\n\", @count(kernel))
	80	if (@count(unknown))
	81	printf (\"Ignored %d unknown userspace hits.\\n\", @count(unknown))
	82	println(\"Stopped stap data collector.\")
	83	}
	84	"""
	85
	86	# buildid class
	87	class BuildIDProfile:
	88	def __init__(self,buildid):
	89	self.counts = defaultdict(lambda: 0)
	90	self.buildid = buildid
	91	self.filename = self.buildid + 'addrs.txt'
	92	self.sources = {}
	93
	94	def __str__(self):
	95	return "BuildIDProfile(buildid %s) items: %s sources: %s" % (self.buildid, self.counts.items(), self.sources.items())
	96
97	# Build the 'counts' dict by adding the hit count to its associated address
98	def accumulate(self,pc,count):
99	self.counts[pc] += count
100
101	# Get the Find the sources of relative addresses from self.counts.keys()
102	def get_sources(self):
e20ae05e	103	vprint(1,"Computing addr2line for %s" % (self.buildid))
2aaf9213 NS	104	# Used to maintain order of writing
	105	ordered_keys = list(self.counts.keys())
	106	# create addr file in /tmp/
	107	with open('/tmp/'+self.filename, 'w') as f:
	108	for k in ordered_keys:
	109	f.write(str(hex(k)) + '\n')
	110	vprint(2,"Dumped addresses")
	111	# Get source:linenum info
	112	dbginfo = self.get_debuginfo()
	113	# Split the lines into a list divided by newlines
	114	lines = dbginfo.split('\n')
	115
	116	for i in range(0,len(lines)):
	117	if lines[i] == '':
	118	continue
	119	split = lines[i].split(':')
	120	src = split[0]
	121	line_number = split[1]
	122	if line_number == None:
	123	continue
	124	if src not in self.sources.keys():
	125	self.sources[src] = SourceLineProfile(self.buildid,src)
	126
	127	# Sometimes addr2line reponds with a string of format ("linenum" discriminator "num")
	128	# trim this to yield "linenum" using a regular expression:
	129	m = re.search('[0-9]+',line_number)
	130	# If m doesn't contain the above regex, it has no number so don't accumulate it
	131	if m == None:
	132	continue
	133	line_number = int(m.group(0))
	134	# eu-addr2line gives outputs beginning at 1, where as in SourceLineProfiler.report
	135	# the line numbering begins at 0. This offset of 1 must be reomved from eu-addr2line
	136	# to ensure compatibility with SourceLineProfiler.report
	137	self.sources[src].accumulate(line_number-1, self.counts[ordered_keys[i]])
	138	vprint(2,"Mapped to %d source files" % (len(self.sources),))
	139	# Remove tempfile
	140	os.remove('/tmp/'+self.filename)
	141
	142	# Report information for this buildid's source files
	143	def report(self,totalhits):
	144	for so in self.sources.values():
	145	so.report(totalhits)
	146
	147	# Get source:linenum information. Assumes self.filename has relative address information
	148	def get_debuginfo(self):
	149	try:
	150	#Get the debuginfo of the bulidid retrieved from stap
	151	p = subprocess.Popen(['debuginfod-find', 'debuginfo', self.buildid],stdout=subprocess.PIPE)
	152	dbg_file,err = p.communicate()
	153	dbg_file = dbg_file.decode('utf-8').rstrip()
e20ae05e NS	154	if dbg_file == '' or dbg_file == None:
	155	raise Exception("No debug file for bid %s from debuginfod servers: %s" % (self.bid, DB_URLS))
	156	elif err != '' and err != None:
	157	raise Exception(err.decode('utf-8').rstrip())
2aaf9213 NS	158	vprint(2, "Stored debuginfod-find debuginfo file as %s" % (dbg_file))
	159	#Use the debuginfo attained from the above process
	160	process = subprocess.Popen(['sh','-c', 'eu-addr2line -A -e ' + dbg_file + ' < /tmp/' + self.filename], stdout=subprocess.PIPE)
	161	out,err = process.communicate()
	162	except Exception as e:
	163	print (e)
2aaf9213 NS	164	return out.decode('utf-8')
	165
	166
	167	# Contains information related to each source of a buildid
	168	class SourceLineProfile:
	169	def __init__(self, bid, source):
	170	self.bid = bid
	171	self.source = source
	172	self.counts = defaultdict(lambda: 0)
	173
	174	def __str__(self):
	175	return "SourceLineProfile(bid %s, source %s) counts: %s" % (self.bid, self.source, self.counts.items())
	176
	177	# Accumulate hits on a line
	178	def accumulate(self, line, count):
	179	self.counts[line] += count
	180
	181	# Get the source file associated with a buildid
	182	def get_source_file(self):
	183	try:
	184	p = subprocess.Popen(['debuginfod-find', 'source', self.bid, self.source],stdout=subprocess.PIPE)
	185	sourcefile,err = p.communicate()
	186	sourcefile = sourcefile.decode('utf-8').rstrip()
	187	if sourcefile == '' or sourcefile == None:
e20ae05e	188	raise Exception("No source file for bid %s, source %s from debuginfod servers: %s" % (self.bid, self.source, DB_URL))
2aaf9213 NS	189	elif err != '' and err != None:
	190	raise Exception(err.decode('utf-8').rstrip())
	191	vprint(2, "Stored debuginfod-find source file as %s" % (sourcefile))
	192	return sourcefile
	193	except Exception as e:
	194	print (e)
	195
	196	# Reporting function for the source file
	197	def report(self, totalhits):
	198	filehits=sum(self.counts.values())
	199	if self.source == '??' or self.source == '':
	200	vprint(0,"%08d (%.2f%%) hits in buildid %s with unknown source" % (filehits, filehits/totalhits*100,
	201	self.bid))
	202	return
	203	# Retrieve the sourcefile's name
	204	sourcefile = self.get_source_file()
	205	if sourcefile == None or sourcefile == '':
	206	return
	207
	208	outfile = os.path.join('profile-'+self.bid, (sourcefile.split('/')[-1]).replace('##','/'))
	209
	210	# Try creating the appropriate directory
	211	if not args.print:
	212	try:
e20ae05e NS	213	# Begins at -1 so that when the for loop counts the profile-buildid directory the
	214	# above_profile_dir is set to 0 (the intended beginning position)
	215	# This saves having to either remove profile-buildid or check for it each iteration
	216	# This variable represents how many directories we are above the profile-buildid
	217	# directory
	218	above_profile_dir = -1
	219	for word in '/'.split(outfile):
	220	if word == "..":
	221	above_profile_dir -=1
	222	else:
	223	above_profile_dir += 1
	224	if above_profile_dir < 0:
	225	raise Exception(outfile + " descends beyond its intended root directory, profile-"+self.bid+".\nEnsuring the directory remains above profile-"+self.bid+" ... ")
	226	outfile = re.sub("\/\.\.","/dotdot", outfile)
	227	if not os.path.isfile(outfile):
	228	os.makedirs(os.path.dirname(outfile))
	229	except Exception as e:
	230	print(e)
2aaf9213 NS	231
	232	# Output source code to 'outfile' and if a line has associated hits (read out of sourcefile)
	233	# then add the line number and hit count before that line. If a context_width is present use
	234	# print the surrounding lines for context in accordance with context_width
	235	vprint(0,"%07d (%.2f%%) hits in %s over %d lines." % (filehits, filehits/totalhits*100,
	236	outfile, len(self.counts)))
e20ae05e NS	237	class blob:
	238	def __init__(self, lower, upper, hit):
	239	self.lower = lower
	240	self.upper = upper
	241	self.hits = []
	242	self.hits.append(hit)
	243	def __str__(self):
	244	if self.lower != self.upper:
	245	return ("Hits: " + ', '.join(str(i) for i in self.hits) + ". Context from lines %s to %s") % (self.lower, self.upper)
	246	else:
	247	return ("Hits: " + ', '.join(str(i) for i in self.hits) + ". Context of line %s") % (self.upper)
2aaf9213	248
e20ae05e NS	249	def get_context(self):
e20ae05e NS	250	return "//" + str(self) +"\n"
2aaf9213	251
e20ae05e NS	252	num_lines = sum(1 for line in open(sourcefile,'r')) - 1
	253	with open(sourcefile,'r') as f, open(outfile, 'w') as of:
	254	hitlines = sorted( list(self.counts.keys()) )
	255	width = -1
	256	if args.context_width >= 0:
	257	width = int(args.context_width)
	258	else:
	259	width = sys.maxsize
	260	upper_bound = sys.maxsize if width == sys.maxsize else hitlines[0]+width
	261	lower_bound = -1 if width == sys.maxsize else hitlines[0] - width
	262	# Set the first upper and lower bounds
	263	context_blobs = []
	264	context_blobs.append(blob(lower_bound, upper_bound, hitlines[0]))
	265	blob_num = 0
	266	for i in hitlines[1:]:
	267	lower = i-width
	268	upper = i+width
	269	# - 1 to connect blobs bordering one another
	270	if context_blobs[blob_num].upper >= lower-1:
	271	context_blobs[blob_num].upper = upper
	272	context_blobs[blob_num].hits.append(i)
	273	else:
	274	blob_num = blob_num+1
	275	context_blobs.append(blob(lower, upper, i))
	276	context_blobs[-1].upper = num_lines if context_blobs[-1].upper > num_lines else context_blobs[-1].upper
2aaf9213	277	for linenum, line, in list(enumerate(f)):
e20ae05e NS	278	# Convenience variable
	279	hits = context_blobs[0].hits
	280	# If we've passed this blobs area of context, pop it
	281	if context_blobs and context_blobs[0].upper < linenum:
	282	context_blobs.pop(0)
	283	if not context_blobs:
	284	break
	285	# If we have reached the beginning of a blob's context,
	286	# print_context()
	287	if context_blobs and linenum == context_blobs[0].lower:
	288	of.write(context_blobs[0].get_context())
2aaf9213 NS	289
	290	# If we have found a line with hits, output info
	291	# otherwise if there is no width, don't take it into account
	292	# otherwise if the current line is within the desired width
	293	# print it for context
e20ae05e NS	294	if linenum in hits:
e20ae05e NS	295	of.write("%07d %s\n" % ( self.counts[linenum], line.rstrip()))
2aaf9213 NS	296	elif width == -1:
2aaf9213 NS	297	of.write("%7s %s\n" % ("", line))
e20ae05e NS	298	elif context_blobs[0].lower <= linenum and linenum <= context_blobs[0].upper:
e20ae05e NS	299	of.write("%7s %s\n" % ("" , line.rstrip()))
2aaf9213 NS	300
	301	if not args.print: # don't close stdout
	302	of.close()
	303
	304	def __main__():
	305	# We require $DEBUGINFOD_URLS
e20ae05e	306	if (not DB_URLS):
2aaf9213 NS	307	raise Exception("Required DEBUGINFOD_URLS is unset.")
	308
	309	# Run SystemTap
	310	(tmpfd,tmpfilename) = tempfile.mkstemp()
	311	stap_cmd = "@prefix@/bin/stap" # not @ bindir @ because autoconf expands that to shell $var expressions
	312	stap_args = ['--ldd', '-o'+tmpfilename]
	313
	314	if args.cmd:
	315	stap_args += ['-c', args.cmd]
	316	if args.timeout:
	317	if args.timeout < 0:
	318	raise Exception("Timeout must be positive")
	319	stap_args += ['-T', str(args.timeout)]
	320	if args.pid:
	321	if args.pid < 0:
	322	raise Exception("pid must be positive")
	323	stap_args += ['-x', str(args.pid)]
	324	for d in args.d:
	325	stap_args += ['-d', d]
	326	if args.stap:
	327	stap_cmd = args.stap
	328	if args.context_width and args.context_width < -1:
	329	raise Exception("context_width must be positive or -1 (for all file)")
	330	stap_args += ['-e', stap_script]
	331
	332	vprint(1,"Building stap data collector.")
	333	vprint(2,"%s %s" % (stap_cmd, stap_args))
	334
	335	try:
	336	p = subprocess.Popen([stap_cmd] + stap_args)
	337	p.communicate() # wait until process exits
	338	except KeyboardInterrupt:
	339	pass
	340	p.kill()
	341
	342	buildids = {} # dict from buildid hexcode to BuildIdProfile object
	343
	344	outp_begin = False
	345	proflines = 0
	346	totalhits = 0
	347
	348	for line in open(tmpfilename,"r"): # read stap output, text mode
	349	line = line.rstrip()
	350	# All relevant output is after BEGIN and before END
	351	if "BEGIN" in line:
	352	outp_begin = True
	353	elif "END" in line:
	354	outp_begin = False
	355	elif outp_begin == False:
	356	if line != "": # diagnostic message
	357	vprint(0,line)
	358	else:
	359	pass
	360	else: # an actual profile record
	361	try:
	362	proflines += 1
	363	(buildid,pc,hits) = line.split()
	364	vprint(3,"(%s,%s,%s)" % (buildid,pc,hits))
	365	totalhits += int(hits)
	366	bidp = buildids.setdefault(buildid, BuildIDProfile(buildid))
	367	# Accumulate hits for offset pc
	368	bidp.accumulate(int(pc),int(hits))
	369	except Exception as e: # parse error?
	370	vprint(2,e)
371
372	os.remove(tmpfilename)
373
374	vprint(0, "Consumed %d profile records of %d hits across %d buildids." % (proflines, totalhits, len(buildids)))
375
376	# Output source information for each buildid
377	totalhits = sum([sum(bid.counts.values()) for bid in buildids.values()])
378	for buildid, bidp in buildids.items():
379	bidp.get_sources()
380	bidp.report(totalhits)
381
382	if __name__ == '__main__':
383	__main__()