This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
[RFC PATCH 2/3] Djprobe improvement patches (Re: Dynamic djprobe)
- From: Masami Hiramatsu <hiramatu at sdl dot hitachi dot co dot jp>
- To: Masami Hiramatsu <hiramatu at sdl dot hitachi dot co dot jp>
- Cc: systemtap at sources dot redhat dot com,"Keshavamurthy, Anil S" <anil dot s dot keshavamurthy at intel dot com>,Roland McGrath <roland at redhat dot com>,Richard J Moore <richardj_moore at uk dot ibm dot com>, Andi Kleen <ak at muc dot de>,michel dot dagenais at polymtl dot ca,Mathieu Desnoyers <compudj at krystal dot dyndns dot org>,"Frank Ch. Eigler" <fche at redhat dot com>,Karim Yaghmour <karim at opersys dot com>,Satoshi Oshima <soshima at redhat dot com>, Hideo Aoki <haoki at redhat dot com>,Yumiko Sugita <sugita at sdl dot hitachi dot co dot jp>
- Date: Wed, 03 Aug 2005 02:07:38 +0900
- Subject: [RFC PATCH 2/3] Djprobe improvement patches (Re: Dynamic djprobe)
- References: <42EA740A.10601@sdl.hitachi.co.jp>
Hi,
This patch is for finding the executing address hidden by nested
interrupts. I introduced an address_stack structure, and
push/pop_iaddr() macros. push_iaddr() stores interrupted executing
address into address_stack percpu stacks and increments a counter.
pop_iaddr() just decrements the counter. We can find hidden addresses
from address_stack and check it.
But Satoshi pointed out the possibility of performance degradation. So I
am developing another patch to find hidden address from current thread’s
stack.
Thanks,
--
Masami HIRAMATSU
2nd Research Dept.
Hitachi, Ltd., Systems Development Laboratory
E-mail: hiramatu@sdl.hitachi.co.jp
arch/i386/kernel/apic.c | 7 +++++++
arch/i386/kernel/cpu/mcheck/p4.c | 3 +++
arch/i386/kernel/irq.c | 3 +++
arch/i386/kernel/kprobes.c | 2 +-
arch/i386/kernel/smp.c | 3 +++
arch/i386/kernel/traps.c | 4 +++-
include/linux/kprobes.h | 20 ++++++++++++++++++++
kernel/kprobes.c | 22 ++++++++++++++++++++++
8 files changed, 62 insertions(+), 2 deletions(-)
diff -Narup linux-2.6.12-djprobe.2/arch/i386/kernel/apic.c linux-2.6.12-djprobe.3/arch/i386/kernel/apic.c
--- linux-2.6.12-djprobe.2/arch/i386/kernel/apic.c 2005-06-18 04:48:29.000000000 +0900
+++ linux-2.6.12-djprobe.3/arch/i386/kernel/apic.c 2005-08-02 20:30:10.000000000 +0900
@@ -26,6 +26,7 @@
#include <linux/mc146818rtc.h>
#include <linux/kernel_stat.h>
#include <linux/sysdev.h>
+#include <linux/kprobes.h>
#include <asm/atomic.h>
#include <asm/smp.h>
@@ -1178,7 +1179,9 @@ fastcall void smp_apic_timer_interrupt(s
* interrupt lock, which is the WrongThing (tm) to do.
*/
irq_enter();
+ push_iaddr((void*)regs->eip);
smp_local_timer_interrupt(regs);
+ pop_iaddr();
irq_exit();
}
@@ -1190,6 +1193,7 @@ fastcall void smp_spurious_interrupt(str
unsigned long v;
irq_enter();
+ push_iaddr((void*)regs->eip);
/*
* Check if this really is a spurious interrupt and ACK it
* if it is a vectored one. Just in case...
@@ -1202,6 +1206,7 @@ fastcall void smp_spurious_interrupt(str
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n",
smp_processor_id());
+ pop_iaddr();
irq_exit();
}
@@ -1214,6 +1219,7 @@ fastcall void smp_error_interrupt(struct
unsigned long v, v1;
irq_enter();
+ push_iaddr((void*)regs->eip);
/* First tickle the hardware, only then report what went on. -- REW */
v = apic_read(APIC_ESR);
apic_write(APIC_ESR, 0);
@@ -1233,6 +1239,7 @@ fastcall void smp_error_interrupt(struct
*/
printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
smp_processor_id(), v , v1);
+ pop_iaddr();
irq_exit();
}
diff -Narup linux-2.6.12-djprobe.2/arch/i386/kernel/cpu/mcheck/p4.c linux-2.6.12-djprobe.3/arch/i386/kernel/cpu/mcheck/p4.c
--- linux-2.6.12-djprobe.2/arch/i386/kernel/cpu/mcheck/p4.c 2005-06-18 04:48:29.000000000 +0900
+++ linux-2.6.12-djprobe.3/arch/i386/kernel/cpu/mcheck/p4.c 2005-08-02 20:30:10.000000000 +0900
@@ -9,6 +9,7 @@
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/smp.h>
+#include <linux/kprobes.h>
#include <asm/processor.h>
#include <asm/system.h>
@@ -73,7 +74,9 @@ static void (*vendor_thermal_interrupt)(
fastcall void smp_thermal_interrupt(struct pt_regs *regs)
{
irq_enter();
+ push_iaddr((void*)regs->eip);
vendor_thermal_interrupt(regs);
+ pop_iaddr();
irq_exit();
}
diff -Narup linux-2.6.12-djprobe.2/arch/i386/kernel/irq.c linux-2.6.12-djprobe.3/arch/i386/kernel/irq.c
--- linux-2.6.12-djprobe.2/arch/i386/kernel/irq.c 2005-06-18 04:48:29.000000000 +0900
+++ linux-2.6.12-djprobe.3/arch/i386/kernel/irq.c 2005-08-02 20:30:10.000000000 +0900
@@ -15,6 +15,7 @@
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <linux/kprobes.h>
DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
EXPORT_PER_CPU_SYMBOL(irq_stat);
@@ -58,6 +59,7 @@ fastcall unsigned int do_IRQ(struct pt_r
#endif
irq_enter();
+ push_iaddr((void*)regs->eip);
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/* Debugging check for stack overflow: is there less than 1KB free? */
{
@@ -104,6 +106,7 @@ fastcall unsigned int do_IRQ(struct pt_r
#endif
__do_IRQ(irq, regs);
+ pop_iaddr();
irq_exit();
return 1;
diff -Narup linux-2.6.12-djprobe.2/arch/i386/kernel/kprobes.c linux-2.6.12-djprobe.3/arch/i386/kernel/kprobes.c
--- linux-2.6.12-djprobe.2/arch/i386/kernel/kprobes.c 2005-08-01 22:00:17.000000000 +0900
+++ linux-2.6.12-djprobe.3/arch/i386/kernel/kprobes.c 2005-08-02 20:30:10.000000000 +0900
@@ -497,7 +497,7 @@ int djprobe_bypass_handler(struct kprobe
kprobe_opcode_t *stub = djpi->stub.insn;
int cpu = smp_processor_id();
- if (!DJPI_CHECKED(djpi)) {
+ if (!DJPI_CHECKED(djpi) && check_safety_djprobe_instance(djpi)) {
cpu_set(cpu, djpi->checked_cpus); /* check this cpu */
if (DJPI_CHECKED(djpi)) { /* all cpus are checked */
diff -Narup linux-2.6.12-djprobe.2/arch/i386/kernel/smp.c linux-2.6.12-djprobe.3/arch/i386/kernel/smp.c
--- linux-2.6.12-djprobe.2/arch/i386/kernel/smp.c 2005-06-18 04:48:29.000000000 +0900
+++ linux-2.6.12-djprobe.3/arch/i386/kernel/smp.c 2005-08-02 20:30:10.000000000 +0900
@@ -19,6 +19,7 @@
#include <linux/mc146818rtc.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
+#include <linux/kprobes.h>
#include <asm/mtrr.h>
#include <asm/tlbflush.h>
@@ -601,7 +602,9 @@ fastcall void smp_call_function_interrup
* At this point the info structure may be out of scope unless wait==1
*/
irq_enter();
+ push_iaddr((void*)regs->eip);
(*func)(info);
+ pop_iaddr();
irq_exit();
if (wait) {
diff -Narup linux-2.6.12-djprobe.2/arch/i386/kernel/traps.c linux-2.6.12-djprobe.3/arch/i386/kernel/traps.c
--- linux-2.6.12-djprobe.2/arch/i386/kernel/traps.c 2005-06-18 04:48:29.000000000 +0900
+++ linux-2.6.12-djprobe.3/arch/i386/kernel/traps.c 2005-08-02 20:30:10.000000000 +0900
@@ -616,12 +616,13 @@ static int dummy_nmi_callback(struct pt_
}
static nmi_callback_t nmi_callback = dummy_nmi_callback;
-
+
fastcall void do_nmi(struct pt_regs * regs, long error_code)
{
int cpu;
nmi_enter();
+ push_iaddr((void *)regs->eip);
cpu = smp_processor_id();
++nmi_count(cpu);
@@ -629,6 +630,7 @@ fastcall void do_nmi(struct pt_regs * re
if (!nmi_callback(regs, cpu))
default_do_nmi(regs);
+ pop_iaddr();
nmi_exit();
}
diff -Narup linux-2.6.12-djprobe.2/include/linux/kprobes.h linux-2.6.12-djprobe.3/include/linux/kprobes.h
--- linux-2.6.12-djprobe.2/include/linux/kprobes.h 2005-08-02 20:29:44.000000000 +0900
+++ linux-2.6.12-djprobe.3/include/linux/kprobes.h 2005-08-02 20:30:10.000000000 +0900
@@ -33,6 +33,7 @@
#include <linux/notifier.h>
#include <linux/smp.h>
#include <asm/kprobes.h>
+#include <linux/irq.h>
struct kprobe;
struct pt_regs;
@@ -121,6 +122,24 @@ struct djprobe {
struct djprobe_instance * inst;
};
+#ifdef CONFIG_DJPROBE
+# define ADDR_STACK_SIZE NR_IRQS
+struct address_stack {
+ int sp;
+ void * addrs[ADDR_STACK_SIZE];
+};
+DECLARE_PER_CPU(struct address_stack, iadr_stack);
+/* push/pop interrupted address (should be called from non-preempt area)*/
+# define push_iaddr(addr) { int sp=__get_cpu_var(iadr_stack).sp++; \
+ if (sp < ADDR_STACK_SIZE && sp >= 0) \
+ __get_cpu_var(iadr_stack).addrs[sp]=(addr);}
+# define pop_iaddr() { int sp=__get_cpu_var(iadr_stack).sp--; \
+ if (sp <= 0) __get_cpu_var(iadr_stack).sp=0;}
+#else /*!DJPROBE*/
+# define push_iaddr(addr) do { } while (0)
+# define pop_iaddr() do { } while (0)
+#endif
+
#ifdef CONFIG_KPROBES
/* Locks kprobe: irq must be disabled */
void lock_kprobes(void);
@@ -155,6 +174,7 @@ extern int arch_prepare_djprobe_instance
extern int djprobe_bypass_handler(struct kprobe * kp, struct pt_regs * regs);
extern void arch_uninstall_djprobe_instance(struct djprobe_instance *djpi);
extern void schdule_release_djprobe_instance(void);
+extern int check_safety_djprobe_instance(struct djprobe_instance *djpi);
int register_djprobe(struct djprobe *p);
void unregister_djprobe(struct djprobe *p);
diff -Narup linux-2.6.12-djprobe.2/kernel/kprobes.c linux-2.6.12-djprobe.3/kernel/kprobes.c
--- linux-2.6.12-djprobe.2/kernel/kprobes.c 2005-08-01 22:00:17.000000000 +0900
+++ linux-2.6.12-djprobe.3/kernel/kprobes.c 2005-08-02 20:30:10.000000000 +0900
@@ -265,10 +265,32 @@ void unregister_jprobe(struct jprobe *jp
* This list is operated on registering and unregistering djprobe.
* Thus, It is not required processing speed. I decided using a 'list'.
*/
+DEFINE_PER_CPU(struct address_stack, iadr_stack);
static DEFINE_SPINLOCK(djprobe_lock);
static LIST_HEAD(djprobe_list);
static int nr_instances = 0;
+/* safety check routine */
+int check_safety_djprobe_instance(struct djprobe_instance *djpi)
+{
+ int sp;
+ preempt_disable();
+ sp = __get_cpu_var(iadr_stack).sp;
+ if (sp > ADDR_STACK_SIZE ){
+ return 0; /*overflowed -- we can't check it*/
+ }
+
+ for (; sp > 0; sp--) {
+ void * addr = __get_cpu_var(iadr_stack).addrs[sp];
+ if((void*)djpi->kp.addr < addr &&
+ (void*)djpi->kp.addr + djpi->stub.size > addr)
+ return 0; /*we find insertion address
+ in interrupted addresses*/
+ }
+ preempt_enable();
+ return 1;
+}
+
static void work_free_djprobe_instances(void *data)
{
struct list_head *pos;