This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
Re: [PATCH]kprobe booster for IA64
- From: Masami Hiramatsu <hiramatu at sdl dot hitachi dot co dot jp>
- To: "bibo,mao" <bibo dot mao at intel dot com>
- Cc: "Keshavamurthy, Anil S" <anil dot s dot keshavamurthy at intel dot com>, Ananth N Mavinakayanahalli <ananth at in dot ibm dot com>, SystemTAP <systemtap at sources dot redhat dot com>, Yumiko Sugita <sugita at sdl dot hitachi dot co dot jp>, Satoshi Oshima <soshima at redhat dot com>, Hideo Aoki <haoki at redhat dot com>, Prasanna S Panchamukhi <prasanna at in dot ibm dot com>, Jim Keniston <jkenisto at us dot ibm dot com>
- Date: Thu, 15 Jun 2006 14:46:21 +0900
- Subject: Re: [PATCH]kprobe booster for IA64
- References: <4485223C.8090109@sdl.hitachi.co.jp> <44868B03.70108@intel.com> <448834B6.1010002@sdl.hitachi.co.jp> <4488E275.1020304@intel.com>
Hi, bibo
bibo,mao wrote:
> That's good for me. BTW in IA64 one bundle has three instructions, I think
> that this bundle should be judged whether it is within exception table.
I updated the kprobe-booster against 2.6.17-rc6-mm2 attached
to this mail. In this patch, I checked the extable in
can_boost()function by using search_exception_tables().
Unfortunately, the search_exception_tables() in IA64 seems corrupted.
As far as I can see, It doesn't work correctly, because the lookup
routine expects that the address format of the exception_table_entry
is "IP + slot", but the compiler (gcc-3.4.5) generates it as
"IP + (slot << 2)". Thus the lookup routine always fails to find the
corresponding entry.
I already made a patch to fix this bug, and will send it to Tony Luck,
Linux-IA64 maintainer, as soon as possible.
Thanks,
--
Masami HIRAMATSU
2nd Research Dept.
Hitachi, Ltd., Systems Development Laboratory
E-mail: hiramatu@sdl.hitachi.co.jp s
arch/ia64/kernel/kprobes.c | 90 ++++++++++++++++++++++++++++++++++++++++++---
include/asm-ia64/kprobes.h | 9 +++-
2 files changed, 91 insertions(+), 8 deletions(-)
diff --exclude=CVS -Narup a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
--- a/arch/ia64/kernel/kprobes.c 2006-06-13 15:04:18.000000000 +0900
+++ b/arch/ia64/kernel/kprobes.c 2006-06-14 16:15:35.000000000 +0900
@@ -78,6 +78,41 @@ static enum instruction_type bundle_enco
};
/*
+ * In this function, we check whether the target bundle is possible
+ * to modify IP and whether it may occur an exception.
+ */
+static __always_inline int can_boost(uint template, unsigned long addr,
+ uint slot)
+{
+ addr &= ~0xFULL;
+ do {
+ if (search_exception_tables(addr + (++slot))) /* we must check the next slot. */
+ return 0; /* exception may occur in this bundle*/
+ } while (slot < 3);
+ template &= 0x1e;
+ if (template >= 0x10 || /* including B unit */
+ template == 0x04 || /* including X unit */
+ template == 0x06) { /* undefined */
+ return 0;
+ }
+ return 1;
+}
+
+/* Insert a long branch code */
+static __always_inline void set_brl_inst(void *from, void *to)
+{
+ s64 rel = ((s64) to - (s64) from) >> 4;
+ bundle_t *brl;
+ brl = (bundle_t *) ((u64) from & ~0xf);
+ brl->quad0.template = 0x05; /* [MLX](stop) */
+ brl->quad0.slot0 = NOP_M_INST; /* nop.m 0x0 */
+ brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2;
+ brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46);
+ /* brl.cond.sptk.many.clr rel<<4 (qp=0) */
+ brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff);
+}
+
+/*
* In this function we check to see if the instruction
* is IP relative instruction and update the kprobe
* inst flag accordingly
@@ -125,6 +160,10 @@ static void __kprobes update_kprobe_inst
break;
}
}
+
+ if (can_boost(template, (unsigned long)p->addr, slot)) {
+ p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE;
+ }
return;
}
@@ -218,7 +257,7 @@ static void __kprobes prepare_break_inst
struct kprobe *p)
{
unsigned long break_inst = BREAK_INST;
- bundle_t *bundle = &p->ainsn.insn.bundle;
+ bundle_t *bundle = &p->ainsn.insn[0].bundle;
/*
* Copy the original kprobe_inst qualifying predicate(qp)
@@ -249,6 +288,14 @@ static void __kprobes prepare_break_inst
* single step on original instruction
*/
update_kprobe_inst_flag(template, slot, major_opcode, kprobe_inst, p);
+
+ /* If the bundle can be boosted, prepare boost bundles */
+ if (p->ainsn.inst_flag & INST_FLAG_BOOSTABLE) {
+ memcpy(&p->ainsn.insn[1].bundle, &p->opcode.bundle,
+ sizeof(bundle_t));
+ set_brl_inst(&p->ainsn.insn[2].bundle,
+ (bundle_t *) p->addr + 1);
+ }
}
static void __kprobes get_kprobe_inst(bundle_t *bundle, uint slot,
@@ -424,10 +471,10 @@ int __kprobes arch_prepare_kprobe(struct
unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL);
unsigned long kprobe_inst=0;
unsigned int slot = addr & 0xf, template, major_opcode = 0;
- bundle_t *bundle = &p->ainsn.insn.bundle;
+ bundle_t *bundle = &p->ainsn.insn[0].bundle;
memcpy(&p->opcode.bundle, kprobe_addr, sizeof(bundle_t));
- memcpy(&p->ainsn.insn.bundle, kprobe_addr, sizeof(bundle_t));
+ memcpy(&p->ainsn.insn[0].bundle, kprobe_addr, sizeof(bundle_t));
template = bundle->quad0.template;
@@ -454,7 +501,7 @@ void __kprobes arch_arm_kprobe(struct kp
unsigned long addr = (unsigned long)p->addr;
unsigned long arm_addr = addr & ~0xFULL;
- memcpy((char *)arm_addr, &p->ainsn.insn.bundle, sizeof(bundle_t));
+ memcpy((char *)arm_addr, &p->ainsn.insn[0].bundle, sizeof(bundle_t));
flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
}
@@ -471,7 +518,7 @@ void __kprobes arch_disarm_kprobe(struct
/*
* We are resuming execution after a single step fault, so the pt_regs
* structure reflects the register state after we executed the instruction
- * located in the kprobe (p->ainsn.insn.bundle). We still need to adjust
+ * located in the kprobe (p->ainsn.insn[0].bundle). We still need to adjust
* the ip to point back to the original stack address. To set the IP address
* to original stack address, handle the case where we need to fixup the
* relative IP address and/or fixup branch register.
@@ -488,7 +535,7 @@ static void __kprobes resume_execution(s
if (slot == 1 && bundle_encoding[template][1] == L)
slot = 2;
- if (p->ainsn.inst_flag) {
+ if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) {
if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
/* Fix relative IP address */
@@ -563,6 +610,24 @@ static void __kprobes prepare_ss(struct
ia64_psr(regs)->ss = 1;
}
+/* prepare to execute directly */
+static void __kprobes prepare_boost(struct kprobe *p, struct pt_regs *regs)
+{
+ unsigned long slot = (unsigned long)p->addr & 0xf;
+
+ regs->cr_iip = (unsigned long)&p->ainsn.insn[1].bundle & ~0xFULL;
+
+ if (slot > 2)
+ slot = 0;
+
+ ia64_psr(regs)->ri = slot;
+
+ /* turn off single stepping */
+ ia64_psr(regs)->ss = 0;
+
+ reset_current_kprobe();
+}
+
static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
{
unsigned int slot = ia64_psr(regs)->ri;
@@ -602,6 +667,11 @@ static int __kprobes pre_kprobes_handler
struct pt_regs *regs = args->regs;
kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs);
struct kprobe_ctlblk *kcb;
+#ifdef CONFIG_PREEMPT
+ unsigned pre_preempt_count = preempt_count();
+#else
+ unsigned pre_preempt_count = 1;
+#endif
/*
* We don't want to be preempted for the entire
@@ -681,6 +751,14 @@ static int __kprobes pre_kprobes_handler
*/
return 1;
+ if (pre_preempt_count && p->ainsn.inst_flag == INST_FLAG_BOOSTABLE &&
+ !p->post_handler) {
+ /* Boost up -- we can execute copied instructions directly */
+ prepare_boost(p, regs);
+ preempt_enable_no_resched();
+ return 1;
+ }
+
ss_probe:
prepare_ss(p, regs);
kcb->kprobe_status = KPROBE_HIT_SS;
diff --exclude=CVS -Narup a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h
--- a/include/asm-ia64/kprobes.h 2006-06-13 15:04:36.000000000 +0900
+++ b/include/asm-ia64/kprobes.h 2006-06-14 10:12:49.000000000 +0900
@@ -29,8 +29,12 @@
#include <linux/percpu.h>
#include <asm/break.h>
-#define MAX_INSN_SIZE 16
+#define MAX_INSN_SIZE 3 /* 3 bundles */
#define BREAK_INST (long)(__IA64_BREAK_KPROBE << 6)
+#define NOP_M_INST (long)(1<<27)
+#define BRL_INST(i1,i2) (long)((0xcL << 37) | /* brl */ \
+ (1L << 12) | /* many */ \
+ (((i1) & 1) << 36) | ((i2) << 13)) /* imm */
typedef union cmp_inst {
struct {
@@ -108,10 +112,11 @@ struct fnptr {
/* Architecture specific copy of original instruction*/
struct arch_specific_insn {
/* copy of the instruction to be emulated */
- kprobe_opcode_t insn;
+ kprobe_opcode_t insn[3];
#define INST_FLAG_FIX_RELATIVE_IP_ADDR 1
#define INST_FLAG_FIX_BRANCH_REG 2
#define INST_FLAG_BREAK_INST 4
+ #define INST_FLAG_BOOSTABLE 8
unsigned long inst_flag;
unsigned short target_br_reg;
};