This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
a different approach to accessing args from return probe
- From: Kevin Stafford <kevinrs at us dot ibm dot com>
- To: systemtap at sources dot redhat dot com
- Date: Mon, 26 Sep 2005 10:05:26 -0700
- Subject: a different approach to accessing args from return probe
> I ran into a problem when considering system call tapsets. There are
a significant amount of "getter" system calls that take user space
pointer args that are eventually assigned in the function. Heres an example:
>
> asmlinkage long sys_time(int __user * tloc) {
> int i;
> struct timeval tv;
> do_gettimeofday(&tv);
> i = tv.tv_sec;
> if (tloc) {
> if (put_user(i,tloc))
> i = -EFAULT;
> }
> return i;
> }
>
> Here is what an appropriate tapset may look like for such a function:
>
> probe kernel.syscall.time = kernel.function("sys_time") {
> /* nothing to export, the param is
> meaningless to me at this point */
> }
>
> probe kernel.syscall.time.return =
> kernel.function("sys_time").return {
> /* now that the buf is
> assigned, export it */
> time = get_uint_ptr($tloc);
> retval = get_eax();
> }
>
> This return probe will produce bogus results because by the time the
return handler is executed, reference to the arg(s) have been clobbered.
This is a problem that must be addressed as this type of behavior is
prevalent in system calls.
>
> Here is a workaround using two probes and a global as a record keeper:
>
> global time_tloc_addr
>
> probe kernel.syscall.time = kernel.function("sys_time") {
> /* save the addr of the arg while I can */
> time_tloc_addr[pid()] = $tloc
> }
>
> probe kernel.syscall.time.return =
> kernel.function("sys_time").return {
> time = get_uint_ptr(time_tloc_addr[pid()]);
> retval = get_eax();
> }
>
> Indeed, this is ugly. It requires two probes to accomplish one task,
and there are globals everywhere. Of course another method is to simply
not allow access to params at the return probe.
The attached patch to kprobes provides a convenient way to remember the
original values of a function's args and make them available to the
return-probe handler. It adds these members to struct kretprobe and
struct kretprobe_instance:
/* added to struct kretprobe */
kretprobe_handler_t entry_handler;
size_t entry_info_sz;
/* added to struct kretprobe_instance */
char entry_info[0];
The extra kretprobe_handler_t field allows a user to optionally specify
a callback to execute at the entry point of the function on which the
return probe is registered. Also optional, the size_t field provides a
way for the user to allocate additional memory to the return probe
instance, which can then be accessed through
kretprobe_instance->entry_info. With this mechanism in place, a user is
able to associate extra data with each return probe. For example, one
could save the function arguments before they are clobbered at the
return. i.e.
struct read_args {
int fd;
char __user *buf;
size_t count;
};
static int read_entry_hdlr(struct kretprobe_instance *ri, struct pt_regs
*regs)
{
/* jprobes's smarts would be nice here. */
/* i386-specific */
struct read_args *args;
unsigned long *stack;
args = (struct read_args *) ri->entry_info;
stack = ®s->esp;
/* stack[0] = return address */
args->fd = (int) stack[1];
args->buf = (char __user*) stack[2];
args->count = (size_t) stack[3];
return 0;
}
static int read_return_hdlr(struct kretprobe_instance *ri, struct
pt_regs *regs)
{
struct read_args *args;
args = (struct read_args *) ri->entry_info;
printk("#%d: On return: fd=%d, buf=%p, count=%zd\n",
ncalls, args->fd, args->buf, args->count);
return 0;
}
struct kretprobe rp = {
.kp.addr = (kprobe_opcode_t*) sys_read,
.handler = read_return_hdlr,
.entry_handler = read_entry_hdlr,
.entry_info_sz = sizeof(struct read_args),
.maxactive = 20
};
If the translator was taught to exploit this feature it would reduce the
incessant use of globals in return probe tapsets to store arg references
(see first proposed solution above). Likewise, it would relieve the
probe script authors from doing this type of thing. How should this be
handled in tapset code?
--
Kevin Stafford
DES 2 | MS 2M3
Beaverton - OR
Linux Technology Center
IBM Systems & Technology
Phone: 1-503-578-3039
Email: kevinrs@us.ibm.com
diff -urN linux-2.6.13/arch/i386/kernel/kprobes.c linux/arch/i386/kernel/kprobes.c
--- linux-2.6.13/arch/i386/kernel/kprobes.c 2005-08-28 16:41:01.000000000 -0700
+++ linux/arch/i386/kernel/kprobes.c 2005-09-23 14:25:28.000000000 -0700
@@ -137,6 +137,9 @@
ri->task = current;
ri->ret_addr = (kprobe_opcode_t *) *sara;
+ if (rp->entry_handler)
+ rp->entry_handler(ri, regs);
+
/* Replace the return addr with trampoline addr */
*sara = (unsigned long) &kretprobe_trampoline;
diff -urN linux-2.6.13/include/linux/kprobes.h linux/include/linux/kprobes.h
--- linux-2.6.13/include/linux/kprobes.h 2005-08-28 16:41:01.000000000 -0700
+++ linux/include/linux/kprobes.h 2005-09-23 14:25:39.000000000 -0700
@@ -119,13 +119,19 @@
* can be active concurrently.
* nmissed - tracks the number of times the probed function's return was
* ignored, due to maxactive being too low.
+ * entry_handler - optional handler function that will execute during the
+ * kretprobe setup (the entry point to the function)
+ * entry_info_size - the amount of extra memory the user would like to
+ * allocate to each kretprobe_instance.
*
*/
struct kretprobe {
struct kprobe kp;
kretprobe_handler_t handler;
+ kretprobe_handler_t entry_handler;
int maxactive;
int nmissed;
+ size_t entry_info_sz;
struct hlist_head free_instances;
struct hlist_head used_instances;
};
@@ -136,6 +142,8 @@
struct kretprobe *rp;
kprobe_opcode_t *ret_addr;
struct task_struct *task;
+ /* pointer to extra memory allocated by kretprobe.entry_info_sz */
+ char entry_info[0];
};
#ifdef CONFIG_KPROBES
diff -urN linux-2.6.13/kernel/kprobes.c linux/kernel/kprobes.c
--- linux-2.6.13/kernel/kprobes.c 2005-08-28 16:41:01.000000000 -0700
+++ linux/kernel/kprobes.c 2005-09-23 14:26:49.000000000 -0700
@@ -522,7 +522,8 @@
INIT_HLIST_HEAD(&rp->used_instances);
INIT_HLIST_HEAD(&rp->free_instances);
for (i = 0; i < rp->maxactive; i++) {
- inst = kmalloc(sizeof(struct kretprobe_instance), GFP_KERNEL);
+ inst = kmalloc((sizeof(struct kretprobe_instance)
+ + rp->entry_info_sz), GFP_KERNEL);
if (inst == NULL) {
free_rp_inst(rp);
return -ENOMEM;