This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Simplistic profiling with the runtime


All --

Enclosed is a simplistic example of using the runtime to generate time
based profiling data. In theory, the code will work on an SMP system
(taking interrupts on all CPU's), but I'm on a Pentium M laptop, so I
never tried that...

The system was running multiple windows, during the test run, one window
had a "spinloop" app was running to make sure the cpu was never idle
while I ran some other programs on the various other windows. Source is
below along with the corresponding output.

In dtrace, there is a distinction made between code to be run via the OS
timer (one interrupt on a random CPU) vs. code to be run via the profile
timer (one interrupt on all CPUs). (See
http://docs.sun.com/app/docs/doc/817-6223/6mlkidljh?a=view).

Has any discussion happen on what we want the systemtap translator
syntax to look like in this regard? I'd be interested in taking a stab
at what it would take to get the translator to handle the os timer
version of the interrupt if this hasn't already been done.

-- charles

[root@cspiraki-lnote profile_timer]# cat profile_timer.c
/* Use 2.6 profile timer (register_timer_hook) as an smp capable timer
*/

/* Demonstrates the beginnings of a generic framework for */
/* asynchronous probes using the runtime */

/* os includes */
#include "linux/profile.h"

/* How many strings to allocate. see strings.c. Default is 0. */
#define STP_NUM_STRINGS 1

/* maximum size for a string. default is 2048 */
#define STP_STRING_SIZE 2048

/* size of strings saved in maps */
#define MAP_STRING_LENGTH 256

/* width of histograms. Default 50 */
#define HIST_WIDTH 50

/* always include this.  Put all non-map defines above it. */
#include "runtime.h"

/* @todo since we don't have aggregation maps yet, try regular maps */
#define NEED_INT64_VALS

#define KEY1_TYPE STRING
#define KEY2_TYPE INT64
#define KEY3_TYPE STRING
#define KEY4_TYPE INT64
#include "map-keys.c"

#include "map.c"

#include "stat.c"
#include "stack.c"

MODULE_DESCRIPTION("SystemTap probe: profile_timer");
MODULE_AUTHOR("Charles Spirakis <charles.spirakis@intel.com>");

Stat timing;
MAP cur_addr;

/* Would probably be better to have the kernel code do a lookup_dcookie
(ala */
/* oprofile) and save the dcookie here and then have user mode code */
/* dereference it (also, ala oprofile). But that would require changes
to */
/* the runtime (possibly a new type? dcookie?) */
/* for the moment, just do it all here as an experiment */

char *get_image_path(struct mm_struct *mm, unsigned long addr, off_t
*offset, char *full_path, u32 len)
{
    struct vm_area_struct *vma;
    char *ret_val;

    for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
        if ((addr < vma->vm_start) || (addr >= vma->vm_end)) {
            continue;
        }

        if (vma->vm_file) {
            *offset = (vma->vm_pgoff << PAGE_SHIFT) + addr -
vma->vm_start;
            ret_val = d_path(vma->vm_file->f_dentry,
                             vma->vm_file->f_vfsmnt,
                             full_path,
                             len);

#if 0
            printk(KERN_INFO "d_path: %p, %p, <%s>\n",
                    ret_val,
                    full_path,
                    ret_val);
#endif

            if (IS_ERR(ret_val)) {
                ret_val = NULL;
            }
        }
        else {
            *offset = addr;
            ret_val = NULL;
        }

        break;
    }

    return ret_val;
}

/* A generic asynchorous probe entry point */
void inst_async(struct pt_regs *regs)
{
    u64 start;
    u64 end;
    unsigned long ip;
    char *path;
    off_t offset;

    /* This is smp safe, but the stack alloc's on each interrupt takes
time */
    /* ah well, just a prototype, worry about a better way to do it
later */
    char full_path[512];

    rdtscll(start);
    ip = REG_IP(regs);

    if (user_mode(regs)) {
        path = get_image_path(current->mm, ip, &offset, full_path,
sizeof(full_path));
        if (path) {
            _stp_map_key_str_int64_str_int64(cur_addr, current->comm,
ip, path, offset);
        }
        else {
            _stp_map_key_str_int64_str_int64(cur_addr, current->comm,
ip, "unknown", offset);
        }
    }
    else {
        _stp_map_key_str_int64_str_int64(cur_addr, current->comm, ip,
"vmlinux", ip);
    }

    /* Create a map of interrupted addresses seen */
    /* This is where a "write-only" mapping concept would be useful */
    /* (as is done in dtrace) since the only readers of this data are */
    /* going to be in user-mode doing post-processing. It would allow */
    /* the runtime to do final aggregation in user space and allow */
    /* us to avoid defining the size of the map ahead of time */
    /* which is required now */
    _stp_map_add_int64(cur_addr, 1);

    /* We will want _stp_stack() and _stp_ustack() to work here */
    /* at some point in the future... */

    rdtscll(end);
    _stp_stat_add(timing, end - start);
}

/* Helper function to convert from profile timer callback into */
/* generic asynchronous entry point form */
/* In this case, there is nothing to do and according to Frank, it */
/* isn't needed. But still here because that was they way I initially */
/* did things... It is only a sample/prototype, after all... */
static void timer_callback(struct pt_regs *regs)
{
    /* Call the asynchronous probe with a ptregs struct */
    inst_async(regs);
}

/* called when the module loads. */
int probe_start(void)
{
    int ret;

    timing = _stp_stat_init(HIST_LINEAR, 0, 5000, 250);

    cur_addr = _stp_map_new_str_int64_str_int64(10000, INT64);

    /* register the profile timer */
    ret = register_timer_hook(timer_callback);
    if (!ret) {
        printk(KERN_ERR "couldn't hook timer callback\n");
    }

    return ret;
}

void probe_exit (void)
{
    /* unregister the os_timer */
    unregister_timer_hook(timer_callback);

    /* print out any colledted data, etc */
    _stp_stat_print (timing, "timing (cpu cycles): # calls:%C  avg:%A
min:%m  max:%M\n%H", 0);

    _stp_print("Process\tIp\tImage\tOffset\tCount\n");
    _stp_map_print (cur_addr, "%1s\t%2P\t%3s\t%4p\t%d");
    _stp_map_del(cur_addr);

    _stp_print_flush();
}

[root@cspiraki-lnote profile_timer]# time stpd profile_timer.ko > pt.out

real    0m7.134s
user    0m0.005s
sys     0m0.014s

[root@cspiraki-lnote profile_timer]# wc pt.out
  209  1073 11804 pt.out

[root@cspiraki-lnote profile_timer]# cat pt.out
timing (cpu cycles): # calls:5949  avg:1272  min:448  max:12500
value |-------------------------------------------------- count
    0 |                                                      0
  250 |                                                      1
  500 |                                                      0
  750 |@@@@@@@@@@@@@@@@                                   1068
 1000 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@  3240
 1250 |@@@@@@@@@@@@@@@@                                   1073
 1500 |@@                                                  177
 1750 |@                                                    92
 2000 |@                                                    71
 2250 |                                                     42
 2500 |                                                     17
 2750 |                                                     17
 3000 |                                                     15
 3250 |                                                      6
 3500 |                                                     10
 3750 |                                                      8
 4000 |                                                      7
 4250 |                                                     10
 4500 |                                                      9
 4750 |@                                                    86

Process Ip      Image   Offset  Count
stpd    0xc011beda : __wake_up+0x74/0x14d       vmlinux c011beda
1
udev    0xc015c363 : kmem_cache_alloc+0x29/0x49 vmlinux c015c363
1
hald    0x80588bb       /usr/sbin/hald  000118bb        1
default.hotplug 0x80993f4       /bin/bash       000523f4        1
default.hotplug 0x49e2a9        /lib/libc-2.3.5.so      000632a9
1
default.hotplug 0x805fcfc       /bin/bash       00018cfc        1
default.hotplug 0x8084dd5       /bin/bash       0003ddd5        1
spinloop        0x804849d       /home/cspiraki/sep_testing/spinloop
0000049d        709
spinloop        0x804848a       /home/cspiraki/sep_testing/spinloop
0000048a        308
spinloop        0x8048496       /home/cspiraki/sep_testing/spinloop
00000496        214
spinloop        0x80484ba       /home/cspiraki/sep_testing/spinloop
000004ba        304
spinloop        0x80484cd       /home/cspiraki/sep_testing/spinloop
000004cd        765
spinloop        0x80484fd       /home/cspiraki/sep_testing/spinloop
000004fd        744
spinloop        0x80484ea       /home/cspiraki/sep_testing/spinloop
000004ea        324
spinloop        0x80484f4       /home/cspiraki/sep_testing/spinloop
000004f4        201
spinloop        0x8048464       /home/cspiraki/sep_testing/spinloop
00000464        191
spinloop        0x804846d       /home/cspiraki/sep_testing/spinloop
0000046d        695
spinloop        0x8048494       /home/cspiraki/sep_testing/spinloop
00000494        214
spinloop        0x80484c4       /home/cspiraki/sep_testing/spinloop
000004c4        220
spinloop        0x804845a       /home/cspiraki/sep_testing/spinloop
0000045a        291
spinloop        0x80484c6       /home/cspiraki/sep_testing/spinloop
000004c6        156
spinloop        0x80484f6       /home/cspiraki/sep_testing/spinloop
000004f6        164
spinloop        0x8048466       /home/cspiraki/sep_testing/spinloop
00000466        259
hald-addon-stor 0xc02ae2f8 : ide_find_setting_by_ioctl+0x14/0x1f
vmlinux
c02ae2f8        1
gnome-terminal  0x72b6ef        /usr/lib/libgobject-2.0.so.0.600.4
000116ef        1
gnome-terminal  0x4a6bbe        /lib/libc-2.3.5.so      0006bbbe
1
gnome-terminal  0x7ed29a        /lib/libpthread-2.3.5.so        0000429a
2
spinloop        0xc030795d : net_rx_action+0x1a2/0x2bf  vmlinux c030795d
1
Xvnc    0x81297a1       /usr/bin/Xvnc   000e37a1        1
Xvnc    0xc017c8cf : fget+0x7d/0x164    vmlinux c017c8cf        1
spinloop        0xc01282bc : __do_softirq+0x2c/0x8a     vmlinux c01282bc
2
Xvnc    0x932402        unknown 00932402        2
gnome-terminal  0x30e6a8        /usr/lib/libvte.so.4.4.0        0001f6a8
1
gnome-terminal  0xc025c725 : n_tty_chars_in_buffer+0x0/0x192    vmlinux
c025c725        1
metacity        0x7209c9        /usr/lib/libgobject-2.0.so.0.600.4
000069c9        1
Xvnc    0xc036c245 : packet_rcv_spkt+0x12b/0x371        vmlinux c036c245
1
spinloop        0xc0334e46 : tcp_ack_no_tstamp+0x2b/0x3e        vmlinux
c0334e46        1
metacity        0x65f410        /usr/X11R6/lib/libX11.so.6.2    0002c410
1
gnome-terminal  0x49f0db        /lib/libc-2.3.5.so      000640db
1
Xvnc    0xc011aa60 : kmap_atomic+0x4e/0x66      vmlinux c011aa60
1
gnome-terminal  0xbfdf59        /usr/X11R6/lib/libXft.so.2.1.2  0000df59
1
Xvnc    0x4a66f3        /lib/libc-2.3.5.so      0006b6f3        1
Xvnc    0x81215a7       /usr/bin/Xvnc   000db5a7        1
Xvnc    0x49f033        /lib/libc-2.3.5.so      00064033        1
... and more. Truncated to keep the email reasonable ...
... Profiling can generate a lot of data             ...


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]