This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RE: Patch [3/3] Userspace probes single stepping out-of-line


>>-----Original Message-----
>>From: systemtap-owner@sourceware.org [mailto:systemtap-owner@sourceware.org] On Behalf Of Prasanna S Panchamukhi
>>Sent: 2006年1月19日 22:41
>>To: systemtap@sources.redhat.com
>>Subject: Re: Patch [3/3] Userspace probes single stepping out-of-line
>>
>>
>>This patch handles the executing the registered callback
>>functions when probes is hit.
>>stepping or the expanded stack should be reused for single stepping
>>out-of-line for other probes.
>>
>>5. A wrapper routines to calculate the offset from the probed file
>>beginning. In case of dynamic shared library, the offset is
>>calculated by substracting the address of the probe point from the
>>beginning of the file mapped address.
>>
>>6. Handing of page faults while inthe kprobes_handler() and while
>>single stepping.
>>
>>7. Accessing user space pages not present in memory, from the
>>registered callback routines.
The patch uses the page_addr aligned with stack pointer to store instructions for single step.
It doesn't consider scenarios of multi-thread process. For example, 1 process has 10 threads
and every thread has an 8kb stack. All the stacks share the same vma. Just near the end of
the first 4kb page, threads might try to extend the same vma at the same time while every
thread still has a stack page available. I suggest to use stack_addr - sizeof(long long) - size,
if the result is bigger than vma->vm_start  at VM_GROWDOWN case.


>>
>>Signed-off-by: Prasanna S Panchamukhi <prasanna@in.ibm.com>
>>
>>
>> arch/i386/kernel/kprobes.c |  352 +++++++++++++++++++++++++++++++++++++++++++--
>> include/asm-i386/kprobes.h |    9 +
>> include/linux/kprobes.h    |    3
>> 3 files changed, 351 insertions(+), 13 deletions(-)
>>
>>diff -puN arch/i386/kernel/kprobes.c~kprobes_userspace_probes_ss_out-of-line arch/i386/kernel/kprobes.c
>>--- linux-2.6.15/arch/i386/kernel/kprobes.c~kprobes_userspace_probes_ss_out-of-line	2006-01-19 19:37:53.000000000 +0530
>>+++ linux-2.6.15-prasanna/arch/i386/kernel/kprobes.c	2006-01-19 19:37:53.000000000 +0530
>>@@ -38,9 +38,11 @@
>>
>> void jprobe_return_end(void);
>>
>>+static struct uprobe_page *uprobe_page;
>> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
>> DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
>>-
>>+DEFINE_PER_CPU(struct uprobe *, current_uprobe) = NULL;
>>+DEFINE_PER_CPU(unsigned long, singlestep_addr);
>> /*
>>  * returns non-zero if opcode modifies the interrupt flag.
>>  */
>>@@ -77,6 +79,23 @@ void __kprobes arch_disarm_kprobe(struct
>> 			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
>> }
>>
>>+void __kprobes arch_disarm_uprobe(struct kprobe *p, kprobe_opcode_t *address)
>>+{
>>+	*address = p->opcode;
>>+}
>>+
>>+void __kprobes arch_arm_uprobe(unsigned long *address)
>>+{
>>+	*(kprobe_opcode_t *)address = BREAKPOINT_INSTRUCTION;
>>+}
>>+
>>+void __kprobes arch_copy_uprobe(struct kprobe *p, unsigned long *address)
>>+{
>>+	memcpy(p->ainsn.insn, (kprobe_opcode_t *)address,
>>+				MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
>>+	p->opcode = *(kprobe_opcode_t *)address;
>>+}
>>+
>> static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
>> {
>> 	kcb->prev_kprobe.kp = kprobe_running();
>>@@ -103,15 +122,223 @@ static inline void set_current_kprobe(st
>> 		kcb->kprobe_saved_eflags &= ~IF_MASK;
>> }
>>
>>+/**
>>+ * This routines get the pte of the page containing the specified address.
>>+ */
>>+static pte_t  __kprobes *get_uprobe_pte(unsigned long address)
>>+{
>>+	pgd_t *pgd;
>>+	pud_t *pud;
>>+	pmd_t *pmd;
>>+	pte_t *pte = NULL;
>>+
>>+	pgd = pgd_offset(current->mm, address);
>>+	if (!pgd)
>>+		goto out;
>>+
>>+	pud = pud_offset(pgd, address);
>>+	if (!pud)
>>+		goto out;
>>+
>>+	pmd = pmd_offset(pud, address);
>>+	if (!pmd)
>>+		goto out;
>>+
>>+	pte = pte_alloc_map(current->mm, pmd, address);
>>+
>>+out:
>>+	return pte;
>>+}
>>+
>>+/**
>>+ * This routine expands the stack beyond the present process address space
>>+ * and copies the instruction to that location, so that processor can
>>+ * single step out-of-line.
>>+ */
>>+static int __kprobes copy_insn_onexpstack(struct uprobe *uprobe ,
>>+			struct pt_regs *regs, struct vm_area_struct *vma)
>>+{
>>+	unsigned long addr, *vaddr, vm_addr;
>>+	int err = 0, size = MAX_INSN_SIZE * sizeof(kprobe_opcode_t);
>>+	struct vm_area_struct *new_vma;
>>+	struct uprobe_page *upage;
>>+	struct mm_struct *mm = current->mm;
>>+	struct page *page;
>>+	pte_t *pte;
>>+
>>+	upage = per_cpu_ptr(uprobe_page, smp_processor_id());
Upage is per cpu data. If just jumping back to user space to execute the single step,
device interrupt might disrupt the current thread, then current thread might be schedule
to another processor. When the thread is woken up again, the upage is not the original.
How about changing it to task_struct related?


>>+
>>+	if (vma->vm_flags & VM_GROWSDOWN)
>>+		vm_addr = vma->vm_start - size;
>>+	else if (vma->vm_flags & VM_GROWSUP)
>>+		vm_addr = vma->vm_end + size;
>>+	else
>>+		return -EFAULT;
>>+
>>+	spin_lock(&mm->page_table_lock);
>>+
>>+	/* TODO: do we need to expand stack if extend_vma fails? */
>>+	if (!(new_vma = find_extend_vma(mm, vm_addr)))
>>+		err = expand_stack(new_vma, vm_addr);
If new_vma==NULL, panic?


>>+
>>+	if (err) {
>>+		spin_unlock(&mm->page_table_lock);
>>+		return err;
>>+	}
>>+
>>+	spin_unlock(&mm->page_table_lock);
>>+
>>+	/*
>>+	 * TODO: Expanding stack for every probe is not a good idea, stack must
>>+	 * either be shrunk to its original size after single stepping or the
>>+	 * expanded stack should be kept track of, for the probed application,
>>+	 * so it can be reused to single step out-of-line
>>+	 */
>>+	if (new_vma->vm_flags & VM_GROWSDOWN)
>>+		addr = new_vma->vm_start;
>>+	else
>>+		addr = new_vma->vm_end - size;
>>+
>>+	pte = get_uprobe_pte(addr);
>>+	if (!pte)
>>+		return -EFAULT;
>>+
>>+	upage->orig_pte = pte;
>>+	upage->orig_pte_val =  pte_val(*pte);
>>+	set_pte(pte, (*(upage->alias_pte)));
>>+
>>+	page = pte_page(*pte);
>>+	vaddr = (unsigned long *)kmap_atomic(page, KM_USER1);
>>+	err = __copy_to_user_inatomic(vaddr,
>>+			(unsigned long *)uprobe->kp.ainsn.insn, size);
>>+	kunmap_atomic(vaddr, KM_USER1);
>>+	if (err)
>>+		return err;
>>+
>>+	regs->eip = addr;
>>+
>>+	return  0;
>>+}
>>+
>>+/**
>>+ * This routine checks for stack free space below the stack pointer and
>>+ * then copies the instructions at that location so that the processor can
>>+ * single step out-of-line. If there is no enough stack space or if
>>+ * copy_to_user fails or if the vma is invalid, it returns error.
>>+ */
>>+static int __kprobes copy_insn_onstack(struct uprobe *uprobe,
>>+			struct pt_regs *regs, struct vm_area_struct *vma)
>>+{
>>+	unsigned long page_addr, stack_addr = regs->esp;
>>+	int  size = MAX_INSN_SIZE * sizeof(kprobe_opcode_t);
>>+	unsigned long *source = (unsigned long *)uprobe->kp.ainsn.insn;
>>+
>>+	if (vma->vm_flags & VM_GROWSDOWN) {
>>+		page_addr = stack_addr & PAGE_MASK;
>>+
>>+		if (((stack_addr - sizeof(long long)) - page_addr) < size)
If stack_addr==page_addr, above is always false because they are unsigned long. Pls. change it to:
	if ((stack_addr - sizeof(long long)) < (page_addr + size))


>>+			return -ENOMEM;
>>+
>>+		if (__copy_to_user_inatomic((unsigned long *)(page_addr + size),
Should (page_addr + size) is just page_addr?


>>+								source, size))
>>+			return -EFAULT;
>>+
>>+		regs->eip = page_addr + size;
Same issue like before.


>>+	} else if (vma->vm_flags & VM_GROWSUP) {
>>+		page_addr = (stack_addr & PAGE_MASK) + PAGE_SIZE;
It's weird to get the page align by this approach. Not accurate.

>>+
>>+		if ((page_addr - (stack_addr + sizeof(long long))) < size)
Similar unsigned long calculation issue.


>>+			return -ENOMEM;
>>+
>>+		if (__copy_to_user_inatomic((unsigned long *)(page_addr - size),
>>+								source, size))
>>+			return -EFAULT;
>>+
>>+		regs->eip = page_addr - size;
>>+	} else
>>+		return -EINVAL;
>>+
>>+	return 0;
>>+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]