This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
Re: [Bug kprobes/2064] New: Support pagepoint probes
- From: Marcelo Tosatti <marcelo dot tosatti at cyclades dot com>
- To: jkenisto at us dot ibm dot com <sourceware-bugzilla at sourceware dot org>
- Cc: systemtap at sources dot redhat dot com
- Date: Sat, 17 Dec 2005 19:18:23 -0200
- Subject: Re: [Bug kprobes/2064] New: Support pagepoint probes
- References: <20051216211044.2064.jkenisto@us.ibm.com>
Hi jkenisto,
On Fri, Dec 16, 2005 at 09:10:45PM -0000, jkenisto at us dot ibm dot com wrote:
> It has been requested that we support "pagepoint" probes. Such a probe is
> analogous to a watchpoint probe; but with a pagepoint probe, neither the number
> of concurrent probes nor the size of the probed area is limited by the CPU's
> debug-register architecture.
And I suppose these probes need to work on physical and also on virtual
page addresses for particular processes?
> Pagepoint probes would presumably be implemented by fussing the permission bits
> on the probed page(s) and hooking the page-fault handler (or exploiting the
> existing kprobes hook).
I encountered a similar requirement for a project to track page accesses
(on a per-virtual-mapping basis, not physical addresses), and it was
solved by:
- disabling the PRESENT bit of the pagetable in question
- setting "PAGE_DISABLED" bit (using a free bit in the pte flags)
- hook in the pagefault handler to identify disabled pte's, reinstantiate them
immediately, and call my private accounting function.
Plan is to convert the hook to SystemTap if possible.
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 088a945..db8c3f7 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -116,7 +116,7 @@ void paging_init(void);
#define _PAGE_DIRTY 0x040
#define _PAGE_PSE 0x080 /* 4 MB (or 2MB) page, Pentium+, if present.. */
#define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */
-#define _PAGE_UNUSED1 0x200 /* available for programmer */
+#define _PAGE_DISABLED 0x200 /* for pagetrace */
#define _PAGE_UNUSED2 0x400
#define _PAGE_UNUSED3 0x800
@@ -225,6 +225,7 @@ static inline int pte_read(pte_t pte) {
static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; }
static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; }
+static inline int pte_disabled(pte_t pte) { return (pte).pte_low & _PAGE_DISABLED; }
static inline int pte_huge(pte_t pte) { return ((pte).pte_low & __LARGE_PTE) == __LARGE_PTE; }
/*
@@ -237,11 +238,15 @@ static inline pte_t pte_exprotect(pte_t
static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; }
static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; }
static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; }
+static inline pte_t pte_presprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_PRESENT; return pte; }
+static inline pte_t pte_enable(pte_t pte) { (pte).pte_low &= ~_PAGE_DISABLED; return pte; }
static inline pte_t pte_mkread(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; }
static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; }
static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; }
static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
+static inline pte_t pte_mkpresent(pte_t pte) { (pte).pte_low |= _PAGE_PRESENT; return pte; }
+static inline pte_t pte_disable(pte_t pte) { (pte).pte_low |= _PAGE_DISABLED; return pte; }
static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= __LARGE_PTE; return pte; }
#ifdef CONFIG_X86_PAE
diff --git a/mm/memory.c b/mm/memory.c
index 4b4fc3a..b57e808 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -418,7 +418,7 @@ copy_one_pte(struct mm_struct *dst_mm, s
struct page *page;
/* pte contains position in swap or file, so copy. */
- if (unlikely(!pte_present(pte))) {
+ if (unlikely(!pte_present(pte)) && !pte_disabled(pte)) {
if (!pte_file(pte)) {
swap_duplicate(pte_to_swp_entry(pte));
/* make sure dst_mm is on swapoff's mmlist. */
@@ -606,7 +606,7 @@ static unsigned long zap_pte_range(struc
(*zap_work)--;
continue;
}
- if (pte_present(ptent)) {
+ if (pte_present(ptent) || pte_disabled(ptent)) {
struct page *page;
(*zap_work) -= PAGE_SIZE;
@@ -908,7 +908,7 @@ struct page *follow_page(struct vm_area_
goto out;
pte = *ptep;
- if (!pte_present(pte))
+ if (!pte_present(pte) && !pte_disabled(pte))
goto unlock;
if ((flags & FOLL_WRITE) && !pte_write(pte))
goto unlock;
@@ -2199,6 +2199,18 @@ static inline int handle_pte_fault(struc
old_entry = entry = *pte;
if (!pte_present(entry)) {
+ if (pte_disabled(entry)) {
+ ptl = pte_lockptr(mm, pmd);
+ spin_lock(ptl);
+ if (unlikely(!pte_same(*pte, entry)))
+ goto unlock;
+ pgtrace(vma, 0, address);
+ set_pte_at(vma->vm_mm, address, pte, pte_enable(pte_mkpresent(entry)));
+ flush_tlb_page(vma, address);
+ update_mmu_cache(vma, address, entry);
+ pte_unmap(pte);
+ goto unlock;
+ }
if (pte_none(entry)) {
if (!vma->vm_ops || !vma->vm_ops->nopage)
return do_anonymous_page(mm, vma, address,
>
> --
> Summary: Support pagepoint probes
> Product: systemtap
> Version: unspecified
> Status: NEW
> Severity: enhancement
> Priority: P3
> Component: kprobes
> AssignedTo: systemtap at sources dot redhat dot com
> ReportedBy: jkenisto at us dot ibm dot com
>
>
> http://sourceware.org/bugzilla/show_bug.cgi?id=2064
>
> ------- You are receiving this mail because: -------
> You are the assignee for the bug, or are watching the assignee.