This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC 1/3] Kprobes: userspace probes new interfaces


This patch adds two new interfaces to insert and remove userspace probes.
This new interface uses two userspace probe objects.
Objects:
-------
Each user space probes is uniquely identified by the 
combination of inode and offset. New user space probes defines
two objects
1. struct uprobe - per probe
2. struct uprobe_module - per application that has probes on 
   it. This is allocated internally per application.

	
	struct uprobe {
		/*path of the application */
		char name[50]; 
		/*includes kprobe structure */
		struct kprobe kp;
		/*list of probe per module */
		struct hlist_node ulist;
		/*inode of the application */
		struct inode *inode;
		/*offset within the page */
		unsigned long offset; 
		/*page containing probes */
		struct page *page;
		/* virtual memory area containing the probes */
		struct vm_area_struct *vma;
	};

	struct uprobe_module {
		struct hlist_head ulist_head;
		/* hlist head containing list of probes within this module */ 
		struct list_head mlist;
		/* list of uprobes_modules */
		struct inode *inode;
		/* inode of the application on which probes are implanted */
		struct nameidata nd;
		/* to hold path/dentry etc. */
	};

Interfaces :
------------
Two new interfaces are defined to insert and remove user 
space probes
register_user space_probe(struct uprobe *);
unregister_user space_probe(struct uprobe *);
	 
register_user space_probe() accepts pointer to struct uprobe.
User has to allocate uprobes structure and initialize following 
elements
	name		- contains the path of the application;
	offset		- offset of the probe within the page;
	kp.addr 	- virtual address within the executable.
	kp.pre_handler 	- handler to be executed when probe is fired.
	kp.post_handler - handler to be executed after single stepping 
			  the original instruction.
	kp.fault_handler- handler to be executed if fault occurs while 
			  executing the original instruction or the 
			  handlers.

page and vma are used internally by register_user space_probe().	
	
unregister_user space_probe(struct uprobe *);

register_user space_probe() first walks through the given path of the 
application and gets the inode of the application where the probes are 
to be inserted. Struct uprobe_module is allocated for the each 
application on which probes are to be inserted. If the probes are to 
be inserted on the application which has probes earlier on them, then 
the same struct uprobe_module is used. Also uprobe structure is added 
to ulist_head within the uprobe_module. Then walk through the list of 
process private mappings and get the virtual memory area containing the
offset. Get the reference to the hashed page containing the offset. If 
the page is found, it should be checked if it is uptodate and lock the 
page in the memory so that the page is not swapped out. Now map the 
page; insert the probe and then unmap the page. Release the page lock. 
Thus probes are inserted in the pages that are already read into the 
memory at the time of registration.

unregister_user space_probe() get the vma and the page containing the 
offset of the user space probes. Maps the page containing the probes 
before replacing the int3/breakpoint instruction with the original 
instruction and then unmap the page. Remove the uprobe structure from 
ulist_head and check if the uprobe is the last one within this module 
to be unregistered. If the uprobe is the last one within the application
to be unregistered, remove the uprobe_module from the 
uprobe_module_list. If there are no references to the uprobe_module 
struct free it.

Usage:
Usage is similar to kprobe.
	/* Allocate a uprobe structure */
	struct uprobe p;

	/* Define pre handler */
	int handler_pre(struct kprobe *p, struct pt_regs *regs)
	{
		<.............collect useful data..............>
	}

	void handler_post(struct kprobe *p, struct pt_regs *regs, 
							unsigned long flags)
	{
		<.............collect useful data..............>
	}

	int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
	{
		<.............collect useful data..............>
	}

	While instering the probe, specify the patchname of the application
	on which probes are to be inserted.

	char pname[] ="/tmp/MOD/appln";

	/* pre_handler */
	p.kp.pre_handler=handler_pre;
	/* pre_handler */
	p.kp.post_handler=handler_post;
	/* pre_handler */
	p.kp.fault_handler=handler_fault;
	/* Secify the offset within the page*/
	p.offset = (kprobe_opcode_t *)0x4d4;
	/* Secify the address within the application/executable */
	/* $nm appln |grep func1 */
	p.kp.addr = (kprobe_opcode_t *)0x80484d4;
	/* copy the string name to p.name */
	strcpy(p.name, pname);
	/* Now registe the userspace probe */
	register_userspace_probe(&p);


	/* To unregister the registered probed, just call..*/
	unregister_userspace_probe(&p);

Signed-of-by: Prasanna S Panchamukhi <prasanna@in.ibm.com>


---

 linux-2.6.13-prasanna/arch/i386/kernel/kprobes.c |   14 +
 linux-2.6.13-prasanna/fs/namei.c                 |   12 
 linux-2.6.13-prasanna/include/linux/kprobes.h    |   25 +
 linux-2.6.13-prasanna/include/linux/namei.h      |    1 
 linux-2.6.13-prasanna/kernel/kprobes.c           |  303 ++++++++++++++++++++++-
 5 files changed, 344 insertions(+), 11 deletions(-)

diff -puN kernel/kprobes.c~kprobes_userspace_probes-newinterface kernel/kprobes.c
--- linux-2.6.13/kernel/kprobes.c~kprobes_userspace_probes-newinterface	2005-09-14 11:00:19.987408280 +0530
+++ linux-2.6.13-prasanna/kernel/kprobes.c	2005-09-14 11:00:53.943246208 +0530
@@ -37,6 +37,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleloader.h>
+#include <linux/namei.h>
 #include <asm/cacheflush.h>
 #include <asm/errno.h>
 #include <asm/kdebug.h>
@@ -46,6 +47,7 @@
 
 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
 static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
+static struct list_head uprobe_module_list;
 
 unsigned int kprobe_cpu = NR_CPUS;
 static DEFINE_SPINLOCK(kprobe_lock);
@@ -417,7 +419,11 @@ static int register_aggr_kprobe(struct k
 /* kprobe removal house-keeping routines */
 static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags)
 {
-	arch_disarm_kprobe(p);
+	if (!kernel_text_address((unsigned long)p->addr)) {
+		struct uprobe *up = container_of(p, struct uprobe, kp);
+		arch_disarm_uprobe(up);
+	} else
+		arch_disarm_kprobe(p);
 	hlist_del(&p->hlist);
 	spin_unlock_irqrestore(&kprobe_lock, flags);
 	arch_remove_kprobe(p);
@@ -434,30 +440,71 @@ static inline void cleanup_aggr_kprobe(s
 		spin_unlock_irqrestore(&kprobe_lock, flags);
 }
 
+/*
+ * This routine check if the probe already exits at the given offset and inode.
+ * Returns the maching pointer, if found.
+ */
+static struct kprobe *get_kprobe_user(struct kprobe *kp)
+{
+	struct uprobe *up = container_of(kp, struct uprobe, kp);
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct kprobe *p;
+
+	head = &kprobe_table[hash_long((unsigned long)up->inode * up->offset,
+				       KPROBE_HASH_BITS)];
+	hlist_for_each_entry(p, node, head, hlist) {
+		struct uprobe *user = container_of(p, struct uprobe, kp);
+		if (user->inode == up->inode && user->offset == up->offset)
+			return p;
+	}
+	return NULL;
+}
+
+static kprobe_opcode_t *insert_kprobe_user(struct kprobe *p)
+{
+	kprobe_opcode_t *addr;
+	struct uprobe *up = container_of(p, struct uprobe, kp);
+	addr = (kprobe_opcode_t *)
+		((unsigned long)(up->inode) * (unsigned long)(up->offset));
+	if ((up->page) && (PageUptodate(up->page))) {
+		arch_copy_kprobe(p);
+		arch_arm_uprobe(up);
+	}
+	return addr;
+}
+
 int register_kprobe(struct kprobe *p)
 {
 	int ret = 0;
 	unsigned long flags = 0;
 	struct kprobe *old_p;
+	kprobe_opcode_t *addr;
 
 	if ((ret = arch_prepare_kprobe(p)) != 0) {
 		goto rm_kprobe;
 	}
 	spin_lock_irqsave(&kprobe_lock, flags);
-	old_p = get_kprobe(p->addr);
+	if (!kernel_text_address((unsigned long)p->addr))
+		old_p = get_kprobe_user(p);
+	else
+		old_p = get_kprobe(p->addr);
 	p->nmissed = 0;
 	if (old_p) {
 		ret = register_aggr_kprobe(old_p, p);
 		goto out;
 	}
 
-	arch_copy_kprobe(p);
 	INIT_HLIST_NODE(&p->hlist);
-	hlist_add_head(&p->hlist,
-		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
-
-  	arch_arm_kprobe(p);
-
+	if (!kernel_text_address((unsigned long)p->addr))
+		addr = insert_kprobe_user(p);
+	else {
+		addr = p->addr;
+		arch_copy_kprobe(p);
+  		arch_arm_kprobe(p);
+	}
+  	hlist_add_head(&p->hlist,
+		       &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]);
 out:
 	spin_unlock_irqrestore(&kprobe_lock, flags);
 rm_kprobe:
@@ -472,7 +519,10 @@ void unregister_kprobe(struct kprobe *p)
 	struct kprobe *old_p;
 
 	spin_lock_irqsave(&kprobe_lock, flags);
-	old_p = get_kprobe(p->addr);
+	if (!kernel_text_address((unsigned long)p->addr))
+		old_p = get_kprobe_user(p);
+	else
+		old_p = get_kprobe(p->addr);
 	if (old_p) {
 		if (old_p->pre_handler == aggr_pre_handler)
 			cleanup_aggr_kprobe(old_p, p, flags);
@@ -501,6 +551,236 @@ void unregister_jprobe(struct jprobe *jp
 	unregister_kprobe(&jp->kp);
 }
 
+/*
+ * Wait for the page to be unlocked if someone else had locked it
+ * and map the page.
+ */
+static int map_uprobe_page(struct uprobe *up, int mapped)
+{
+	/*nothing to to done, page is already mapped */
+	if (mapped)
+		return 0;
+	if (!up->page)
+		return 1;
+
+	wait_on_page_locked(up->page);
+	/* we could probably retry readpage here. */
+	if (!PageUptodate(up->page))
+		return 1;
+	up->kp.addr = (kprobe_opcode_t *) kmap_atomic(up->page, KM_USER0);
+	return 0;
+}
+
+/*
+ * Get the offset with the mapped page then register kprobe.
+ */
+static int insert_probe_page(struct uprobe *up)
+{
+	up->kp.addr = (kprobe_opcode_t *) ((unsigned long)up->kp.addr +
+				 (unsigned long)(up->offset & ~PAGE_MASK));
+	up->kp.opcode = 0;
+	return register_kprobe(&up->kp);
+
+}
+
+static void unmap_uprobe_page(struct uprobe *up)
+{
+	kunmap_atomic(up->kp.addr, KM_USER0);
+}
+
+/*
+ * find_get_vma walks through the list of process private mappings and
+ * returns the pointer to vma containing the offset if found.
+ */
+static struct vm_area_struct *find_get_vma(unsigned long offset,
+					   struct address_space *mapping)
+{
+	struct vm_area_struct *vma = NULL;
+	struct prio_tree_iter iter;
+	struct prio_tree_root *head = &mapping->i_mmap;
+	struct mm_struct *mm;
+	unsigned long start, end;
+
+	spin_lock(&mapping->i_mmap_lock);
+	vma_prio_tree_foreach(vma, &iter, head, offset, offset) {
+		mm = vma->vm_mm;
+		spin_lock(&mm->page_table_lock);
+		start = vma->vm_start - (vma->vm_pgoff << PAGE_SHIFT);
+		end = vma->vm_end - (vma->vm_pgoff << PAGE_SHIFT);
+		spin_unlock(&mm->page_table_lock);
+		if ((start + offset) < end) {
+			spin_unlock(&mapping->i_mmap_lock);
+			return vma;
+		}
+	}
+	spin_unlock(&mapping->i_mmap_lock);
+	return NULL;
+}
+
+/*
+ * Gets exclusive write access to the given inode to ensure that the file
+ * on which probes are currently applied does not change. Use the function,
+ * deny_write_access_to_inode() we added in fs/namei.c.
+ */
+static inline int ex_write_lock(struct inode *inode)
+{
+	return deny_write_access_to_inode(inode);
+}
+
+/*
+ * Called when removing user space probes to release the write lock on the
+ * inode.
+ */
+static inline int ex_write_unlock(struct inode *inode)
+{
+	atomic_inc(&inode->i_writecount);
+	return 0;
+}
+
+/*
+ * Walk the uprobe_module_list and return the uprobe module with matching
+ * inode.
+ */
+static struct uprobe_module *get_module_by_inode(struct inode *inode)
+{
+	struct uprobe_module *um;
+	list_for_each_entry(um, &uprobe_module_list, mlist) {
+		if (um->inode == inode)
+			return um;
+	}
+	return NULL;
+}
+
+/*
+ * Walk the path and get the inode. Check for matching inode with the module
+ * list.
+ */
+static struct uprobe_module *get_module_by_name(struct uprobe *p)
+{
+	struct nameidata nd;
+	struct inode *inode;
+	char *temp = p->name;
+
+	if (path_lookup(temp, LOOKUP_FOLLOW, &nd)) {
+		path_release(&nd);
+		printk("Failed to lookup the path\n");
+		return NULL;
+	}
+	inode = nd.dentry->d_inode;
+	path_release(&nd);
+	return get_module_by_inode(inode);
+}
+
+/*
+ * get inode operations.
+ *
+ * Walk the path name and get the inode. This function leaves with the
+ * dentry held and taking with the inode writelock held to ensure that the
+ * file on which probes are currently active does not change from under us.
+ */
+static struct uprobe_module *get_inode_ops(struct uprobe *up)
+{
+	struct uprobe_module *um = NULL;
+	int error;
+	char *temp = up->name;
+
+	INIT_HLIST_NODE(&up->ulist);
+	um = get_module_by_name(up);
+	if (um) {
+		up->inode = um->inode;
+		hlist_add_head(&up->ulist, &um->ulist_head);
+		goto out;
+	}
+
+	um = kcalloc(1, sizeof(struct uprobe_module), GFP_KERNEL);
+	if (path_lookup(temp, LOOKUP_FOLLOW, &um->nd))
+		goto err;
+
+	up->inode = um->nd.dentry->d_inode;
+
+	if ((error = ex_write_lock(up->inode))) {
+		path_release(&um->nd);
+		goto err;
+	}
+	INIT_HLIST_HEAD(&um->ulist_head);
+	hlist_add_head(&up->ulist, &um->ulist_head);
+	list_add(&um->mlist, &uprobe_module_list);
+	um->inode = up->inode;
+	goto out;
+
+err:
+	kfree(um);
+out:
+	return um;
+}
+/*
+ * physical insertion/removal of probes in the actual pages of the module.
+ * Register user space probes before actually instering probes in the page for
+ * a given pair of inode and offset.
+ */
+int register_userspace_probe(struct uprobe *up)
+{
+	struct address_space *mapping;
+	struct uprobe_module *um;
+	int error = 0;
+
+	if (!(um = get_inode_ops(up))) {
+		printk("get_inode_opertion: %s returned error %d\n", up->name,
+		       error);
+		return -ENOSYS;
+	}
+
+	mapping = up->inode->i_mapping;
+	up->vma = find_get_vma(up->offset, mapping);
+	up->page = find_get_page(mapping, (up->offset >> PAGE_CACHE_SHIFT));
+
+	if (!map_uprobe_page(up, 0)) {
+		error = insert_probe_page(up);
+		unmap_uprobe_page(up);
+	}
+
+	if (up->page)
+		page_cache_release(up->page);
+
+	return error;
+}
+
+/*
+ * physical insertion/removal of probes in the actual pages of the module.
+ * Register user space probes before actually instering probes in the page for
+ * a given pair of inode and offset.
+ *
+ */
+void unregister_userspace_probe(struct uprobe *up)
+{
+	kprobe_opcode_t *addr;
+	struct address_space *mapping = up->inode->i_mapping;
+	struct uprobe_module *um;
+
+	up->vma = find_get_vma(up->offset, mapping);
+	up->page = find_get_page(mapping, up->offset >> PAGE_CACHE_SHIFT);
+
+	if (map_uprobe_page(up, 0)) {
+		printk("Probe unregister: failed\n");
+		goto out;
+	}
+	addr = (kprobe_opcode_t *) (up->kp.addr +
+			 (unsigned long)(up->offset & ~PAGE_MASK));
+	up->kp.addr = addr;
+	unregister_kprobe(&up->kp);
+	unmap_uprobe_page(up);
+	um = get_module_by_inode(up->inode);
+	hlist_del(&up->ulist);
+	if (hlist_empty(&um->ulist_head)) {
+		list_del(&um->mlist);
+		ex_write_unlock(up->inode);
+		path_release(&um->nd);	/* release path */
+	}
+out:
+	if (up->page)
+		page_cache_release(up->page);
+}
+
 #ifdef ARCH_SUPPORTS_KRETPROBES
 
 int register_kretprobe(struct kretprobe *rp)
@@ -574,6 +854,8 @@ static int __init init_kprobes(void)
 		INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
 	}
 
+	/* initialize uprobe_module_list */
+	INIT_LIST_HEAD(&uprobe_module_list);
 	err = arch_init_kprobes();
 	if (!err)
 		err = register_die_notifier(&kprobe_exceptions_nb);
@@ -590,4 +872,5 @@ EXPORT_SYMBOL_GPL(unregister_jprobe);
 EXPORT_SYMBOL_GPL(jprobe_return);
 EXPORT_SYMBOL_GPL(register_kretprobe);
 EXPORT_SYMBOL_GPL(unregister_kretprobe);
-
+EXPORT_SYMBOL_GPL(register_userspace_probe);
+EXPORT_SYMBOL_GPL(unregister_userspace_probe);
diff -puN include/linux/kprobes.h~kprobes_userspace_probes-newinterface include/linux/kprobes.h
--- linux-2.6.13/include/linux/kprobes.h~kprobes_userspace_probes-newinterface	2005-09-14 11:00:20.025402504 +0530
+++ linux-2.6.13-prasanna/include/linux/kprobes.h	2005-09-14 11:00:20.047399160 +0530
@@ -33,7 +33,10 @@
 #include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/smp.h>
-
+#include <linux/mm.h>
+#include <linux/dcache.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
 #include <asm/kprobes.h>
 
 /* kprobe_status settings */
@@ -103,6 +106,24 @@ struct jprobe {
 	kprobe_opcode_t *entry;	/* probe handling code to jump to */
 };
 
+
+struct uprobe {
+	char name[50];
+	struct kprobe kp;
+	struct hlist_node ulist; /*list of probe per module */
+	struct inode *inode;
+	unsigned long offset;
+	struct page *page;
+	struct vm_area_struct *vma;
+};
+
+struct uprobe_module {
+	struct hlist_head ulist_head;
+	struct list_head mlist;
+	struct inode *inode;
+	struct nameidata nd; /* to hold path/dentry etc. */
+};
+
 #ifdef ARCH_SUPPORTS_KRETPROBES
 extern void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs);
 #else /* ARCH_SUPPORTS_KRETPROBES */
@@ -159,6 +180,8 @@ extern int arch_init_kprobes(void);
 extern void show_registers(struct pt_regs *regs);
 extern kprobe_opcode_t *get_insn_slot(void);
 extern void free_insn_slot(kprobe_opcode_t *slot);
+extern void arch_arm_uprobe(struct uprobe *up);
+extern void arch_disarm_uprobe(struct uprobe *up);
 
 /* Get the kprobe at this addr (if any).  Must have called lock_kprobes */
 struct kprobe *get_kprobe(void *addr);
diff -puN arch/i386/kernel/kprobes.c~kprobes_userspace_probes-newinterface arch/i386/kernel/kprobes.c
--- linux-2.6.13/arch/i386/kernel/kprobes.c~kprobes_userspace_probes-newinterface	2005-09-14 11:00:20.029401896 +0530
+++ linux-2.6.13-prasanna/arch/i386/kernel/kprobes.c	2005-09-14 11:00:20.048399008 +0530
@@ -87,6 +87,20 @@ void arch_disarm_kprobe(struct kprobe *p
 			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
 }
 
+void arch_arm_uprobe(struct uprobe *up)
+{
+	*up->kp.addr = BREAKPOINT_INSTRUCTION;
+	flush_icache_user_range(up->vma, up->page,
+			(unsigned long) up->kp.addr, sizeof(kprobe_opcode_t));
+}
+
+void arch_disarm_uprobe(struct uprobe *up)
+{
+	*up->kp.addr = up->kp.opcode;
+	flush_icache_user_range(up->vma, up->page,
+			(unsigned long) up->kp.addr, sizeof(kprobe_opcode_t));
+}
+
 void arch_remove_kprobe(struct kprobe *p)
 {
 }
diff -puN include/linux/namei.h~kprobes_userspace_probes-newinterface include/linux/namei.h
--- linux-2.6.13/include/linux/namei.h~kprobes_userspace_probes-newinterface	2005-09-14 11:00:20.033401288 +0530
+++ linux-2.6.13-prasanna/include/linux/namei.h	2005-09-14 11:00:20.048399008 +0530
@@ -73,6 +73,7 @@ extern int follow_up(struct vfsmount **,
 
 extern struct dentry *lock_rename(struct dentry *, struct dentry *);
 extern void unlock_rename(struct dentry *, struct dentry *);
+extern int deny_write_access_to_inode(struct inode *inode);
 
 static inline void nd_set_link(struct nameidata *nd, char *path)
 {
diff -puN fs/namei.c~kprobes_userspace_probes-newinterface fs/namei.c
--- linux-2.6.13/fs/namei.c~kprobes_userspace_probes-newinterface	2005-09-14 11:00:20.037400680 +0530
+++ linux-2.6.13-prasanna/fs/namei.c	2005-09-14 11:00:20.052398400 +0530
@@ -286,6 +286,18 @@ int get_write_access(struct inode * inod
 	return 0;
 }
 
+int deny_write_access_to_inode(struct inode * inode)
+{
+	spin_lock(&inode->i_lock);
+	if (atomic_read(&inode->i_writecount) > 0) {
+		spin_unlock(&inode->i_lock);
+		return -ETXTBSY;
+	}
+	atomic_dec(&inode->i_writecount);
+	spin_unlock(&inode->i_lock);
+	return 0;
+}
+
 int deny_write_access(struct file * file)
 {
 	struct inode *inode = file->f_dentry->d_inode;

_
-- 

Prasanna S Panchamukhi
Linux Technology Center
India Software Labs, IBM Bangalore
Ph: 91-80-25044636
<prasanna@in.ibm.com>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]