This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Perfmon systemtap runtime support


In-Reply-To: <44B6C74E.7040008@redhat.com>

On Thu, 13 Jul 2006 18:21:02 -0400, William Cohen wrote:
> 
> I have been working on getting some performance monitoring support into 
> systemtap. The perfmon1.diff patch is a very simple addition to the 
> runtime. It just has functions to setup the perfmon monitoring hardware, 
> read a counter, and shutdown the performance monitoring hardware. It 
> uses the perfmon2 kernel ABI to configure the hardware.
> 
> I have completed changes to the translator to use the runtime functions. 
> I took Marin's suggestion of using guru mode to allow access to the 
> various C functions and wrote some examples that used the runtime functions.
> 
> The cost is relatively high for accessing the counters. Below is the 
> output from p2x.stp, counting the number of cycles between consecutive 
> calls to read the cycle count:
> 
> [wcohen@dhcp59-187 systemtap_perfmon]$ ./install/bin/stap -g  p2x.stp
> interval = 15491

I modified your sample pure C module from a while ago and I get 500-700
cycles overhead:

[root@tu kpfm_test3]# insmod ./kpfm_test3.ko ; rmmod kpfm_test3
val0 = 998005, val1 = 998533, interval = 528

---------------- Makefile ----------------
buildtest:
	make -C /lib/modules/2.6.17.1-32-pfmon/build M=`pwd` modules

obj-m += kpfm_test3.o

clean:
	/bin/rm -rf *.o *.ko *~ *.mod.c .*.cmd .tmp_versions Modules.symvers

---------------- README ----------------
This is a simple example to show how Perfmon2 kabi works.  It counts
the cpu_clock_unhalted events. This is currently only set up for the
AMD64. It won't work with other processors without modifications.

To run:

# make
# modprobe perfmon_{arch}
# /sbin/insmod ./kpfm_test3.ko
# /sbin/rmmod kpfm_test3
(should print out information in /var/log/messages)
# tail /var/log/messages

Modified to count how many clocks it takes to read the PMD registers.

---------------- kpfm_test3.c ----------------
/*
 * kpfm_test3.c
 *
 * Copyright (c) 2006 Red Hat
 * 		Contributions by William Cohen <wcohen@redhat.com>
 *		Modified by Chuck Ebbert <76306.1226@compuserve.com>
 *
 * A simple program to test overhead of reading perfmon counters.
 *
 */
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/module.h>
#include <linux/config.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/sysctl.h>

#include <linux/perfmon.h>

MODULE_AUTHOR("William Cohen <wcohen@redhat.com>");
MODULE_DESCRIPTION("kpfm_test3 module to exercise perfmon2 KABI");
MODULE_LICENSE("GPL");

static struct pfarg_ctx req;
static struct completion c;
static void *desc;
static struct pfarg_load load_args;
static struct pfarg_start start_args;

/* Set things up for AMD64.
 */
/* monitor events in user mode (CPL > 0) */
#define USR_BIT (1<<16)
/* monitor events in system mode (CPL == 0) */
#define OS_BIT (1<<17)
/* edge detect */
#define EDGE_BIT (1<<18)
/* pin control */
#define PC_BIT (1<<19)
/* generate APIC interrupt on overflow */
#define INT_BIT (1<<20)
/* enable counter */
#define EN_BIT (1<<22)
/* invert counter mask */
#define INV_BIT (1<<23)

#define NUM_PMD 1
/* start with reg 1 (0 is used by softlockup on x86_64) */
#define FIRST_PMD 1
static struct pfarg_pmd pd[] = {
	{.reg_num = FIRST_PMD,
	 .reg_value = 0,
	},
};
static int num_pfm_pmd = NUM_PMD;

#define NUM_PMC 1
#define FIRST_PMC 1
/* event 0x76 = number of unhalted CPU clocks */
#define EVT_SEL 0x76
static struct pfarg_pmc pc[] = {
	{.reg_num = FIRST_PMC,
	 .reg_value = EVT_SEL | OS_BIT | EN_BIT | INT_BIT,
	},
};
static int num_pfm_pmc = NUM_PMC;


static int pfm_register_setup(void)
{
	/* for the time being hard coded the events to monitor */
	int err = 0;

	err = pfmk_write_pmcs(desc, pc, num_pfm_pmc);
	if (err)
		goto out;
	
	err = pfmk_write_pmds(desc, pd, num_pfm_pmd);

 out:	
	return err;
}

static int cpu_pfm_init(void)
{
	int err = 0;

	/* set up context information */
	/* only does system-wide contexts */
	req.ctx_flags |= PFM_FL_SYSTEM_WIDE;

	err = pfmk_create_context(&req, NULL, 0, &c, &desc, NULL);
	if (err)
		goto cleanup;

	err = pfm_register_setup();
	if (err)
		goto cleanup2;

	err = pfmk_load_context(desc, &load_args);
	if (err) {
		printk("pfmk_load_context error\n");
		goto cleanup2;
	}

	err = pfmk_start(desc, &start_args);
	if (err) {
		printk("pfmk_start error\n");
		goto cleanup3;
	}

	return err;

 cleanup3:
	pfmk_unload_context(desc);
 cleanup2:
	pfmk_close(desc);
 cleanup:
	return err;
}

static int kpfm_test3_init_module(void)
{
	return cpu_pfm_init();
}

static void cpu_pfm_cleanup(void)
{
	long long a;

	/* read pmds twice in a row and see how many
	 * clock cycles elapse between reads
	 */	
	if (pfmk_read_pmds(desc, pd, num_pfm_pmd))
		printk( "pfm_read_pmds error\n");
	a = pd[0].reg_value;
	if (pfmk_read_pmds(desc, pd, num_pfm_pmd))
		printk( "pfm_read_pmds error\n");

	printk ("val0 = %lld, val1 = %lld, interval = %lld\n",
		a, pd[0].reg_value, pd[0].reg_value - a);

	if (pfmk_stop(desc))
		printk("pfmk_stop error\n");
	if (pfmk_unload_context(desc))
		printk ("pfmk_unload_context error\n");
	if (pfmk_close(desc))
		printk ("pfmk_unload_context error\n");;
}

static void kpfm_test3_cleanup_module(void)
{
	cpu_pfm_cleanup();
	return;
}

module_init(kpfm_test3_init_module);
module_exit(kpfm_test3_cleanup_module);

-- 
Chuck
 "You can't read a newspaper if you can't read."  --George W. Bush


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]