From 9eef42260030137a6922012b88d65e9b5e7ce26d Mon Sep 17 00:00:00 2001 From: David Smith Date: Wed, 14 Oct 2015 16:03:52 -0500 Subject: [PATCH] Standardize and fix mm's exe_file handling for kernels >= 4.1. * runtime/task_finder_vma.c (stap_find_exe_file): Rewrite stap_find_exe_file() to handle kernels >= 4.1. * runtime/linux/task_finder.c (__stp_get_mm_path): Call the net stap_find_exe_file(), which handles locking the mm. * runtime/linux/task_finder.c (__stp_get_mm_path): Ditto. * runtime/linux/uprobes-inode.c (stapiu_get_task_inode): Ditto. * tapset/linux/task.stp (task_exe_file): Ditto. --- runtime/linux/task_finder.c | 13 ++----- runtime/linux/task_finder2.c | 13 ++----- runtime/linux/uprobes-inode.c | 12 +++---- runtime/task_finder_vma.c | 65 +++++++++++++++++++++++++++++------ tapset/linux/task.stp | 23 +------------ 5 files changed, 66 insertions(+), 60 deletions(-) diff --git a/runtime/linux/task_finder.c b/runtime/linux/task_finder.c index 3ee2c4a9d..ad6901819 100644 --- a/runtime/linux/task_finder.c +++ b/runtime/linux/task_finder.c @@ -437,18 +437,9 @@ __stp_task_finder_cleanup(void) static char * __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen) { - struct file *vm_file; + struct file *vm_file = stap_find_exe_file(mm); char *rc = NULL; - // The down_read() function can sleep, so we'll call - // down_read_trylock() instead, which can fail. If if fails, - // we'll just pretend this task didn't have a path. - if (!mm || ! down_read_trylock(&mm->mmap_sem)) { - *buf = '\0'; - return ERR_PTR(-ENOENT); - } - - vm_file = stap_find_exe_file(mm); if (vm_file) { #ifdef STAPCONF_DPATH_PATH rc = d_path(&(vm_file->f_path), buf, buflen); @@ -456,12 +447,12 @@ __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen) rc = d_path(vm_file->f_dentry, vm_file->f_vfsmnt, buf, buflen); #endif + fput(vm_file); } else { *buf = '\0'; rc = ERR_PTR(-ENOENT); } - up_read(&mm->mmap_sem); return rc; } diff --git a/runtime/linux/task_finder2.c b/runtime/linux/task_finder2.c index 0dc9ee68e..5ea704bf9 100644 --- a/runtime/linux/task_finder2.c +++ b/runtime/linux/task_finder2.c @@ -421,18 +421,9 @@ stap_utrace_detach_ops(struct utrace_engine_ops *ops) static char * __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen) { - struct file *vm_file; + struct file *vm_file = stap_find_exe_file(mm); char *rc = NULL; - // The down_read() function can sleep, so we'll call - // down_read_trylock() instead, which can fail. If if fails, - // we'll just pretend this task didn't have a path. - if (!mm || ! down_read_trylock(&mm->mmap_sem)) { - *buf = '\0'; - return ERR_PTR(-ENOENT); - } - - vm_file = stap_find_exe_file(mm); if (vm_file) { #ifdef STAPCONF_DPATH_PATH rc = d_path(&(vm_file->f_path), buf, buflen); @@ -440,12 +431,12 @@ __stp_get_mm_path(struct mm_struct *mm, char *buf, int buflen) rc = d_path(vm_file->f_dentry, vm_file->f_vfsmnt, buf, buflen); #endif + fput(vm_file); } else { *buf = '\0'; rc = ERR_PTR(-ENOENT); } - up_read(&mm->mmap_sem); return rc; } diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c index 06d940b20..1393b9aeb 100644 --- a/runtime/linux/uprobes-inode.c +++ b/runtime/linux/uprobes-inode.c @@ -1,6 +1,6 @@ /* -*- linux-c -*- * Common functions for using inode-based uprobes - * Copyright (C) 2011-2013 Red Hat Inc. + * Copyright (C) 2011-2013,2015 Red Hat Inc. * * This file is part of systemtap, and is free software. You can * redistribute it and/or modify it under the terms of the GNU General @@ -698,12 +698,12 @@ stapiu_get_task_inode(struct task_struct *task) return NULL; } - down_read(&mm->mmap_sem); vm_file = stap_find_exe_file(mm); - if (vm_file && vm_file->f_path.dentry) - inode = vm_file->f_path.dentry->d_inode; - - up_read(&mm->mmap_sem); + if (vm_file) { + if (vm_file->f_path.dentry) + inode = vm_file->f_path.dentry->d_inode; + fput(vm_file); + } return inode; } diff --git a/runtime/task_finder_vma.c b/runtime/task_finder_vma.c index a09093d21..f0a4db957 100644 --- a/runtime/task_finder_vma.c +++ b/runtime/task_finder_vma.c @@ -1,6 +1,7 @@ #ifndef TASK_FINDER_VMA_C #define TASK_FINDER_VMA_C +#include #include #include @@ -376,21 +377,65 @@ stap_drop_vma_maps(struct task_struct *tsk) return 0; } -/* Find the main executable for this mm. - * NB: mmap_sem should be held already. */ +/* + * stap_find_exe_file - acquire a reference to the mm's executable file + * + * Returns NULL if mm has no associated executable file. User must + * release file via fput(). + */ static struct file* stap_find_exe_file(struct mm_struct* mm) { - /* VM_EXECUTABLE was killed in kernel commit e9714acf, but in kernels - * that new we can just use mm->exe_file anyway. (PR14712) */ + // The following kernel commit changed the way the exported + // get_mm_exe_file() works. This commit first appears in the + // 4.1 kernel: + // + // commit 90f31d0ea88880f780574f3d0bb1a227c4c66ca3 + // Author: Konstantin Khlebnikov + // Date: Thu Apr 16 12:47:56 2015 -0700 + // + // mm: rcu-protected get_mm_exe_file() + // + // This patch removes mm->mmap_sem from mm->exe_file read side. + // Also it kills dup_mm_exe_file() and moves exe_file + // duplication into dup_mmap() where both mmap_sems are + // locked. + // + // So, for kernels >= 4.1, we'll use get_mm_exe_file(). For + // kernels < 4.1 but with get_mm_exe_file() exported, we'll + // still use our own code. The original get_mm_exe_file() can + // sleep (since it calls down_read()), so we'll have to roll + // our own. +#if defined(STAPCONF_DPATH_PATH) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0)) + return get_mm_exe_file(mm); +#else + struct file *exe_file = NULL; + + // The down_read() function can sleep, so we'll call + // down_read_trylock() instead, which can fail. If it + // fails, we'll just pretend this task didn't have a + // exe file. + if (mm && down_read_trylock(&mm->mmap_sem)) { + + // VM_EXECUTABLE was killed in kernel commit e9714acf, + // but in kernels that new we can just use + // mm->exe_file anyway. (PR14712) #ifdef VM_EXECUTABLE - struct vm_area_struct *vma; - for (vma = mm->mmap; vma; vma = vma->vm_next) - if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) - return vma->vm_file; - return NULL; + struct vm_area_struct *vma; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) { + exe_file = vma->vm_file; + break; + } + } #else - return mm->exe_file; + exe_file = mm->exe_file; +#endif + if (exe_file) + get_file(exe_file); + up_read(&mm->mmap_sem); + } + return exe_file; #endif } diff --git a/tapset/linux/task.stp b/tapset/linux/task.stp index 2534f5c0b..a526c9146 100644 --- a/tapset/linux/task.stp +++ b/tapset/linux/task.stp @@ -799,28 +799,7 @@ function task_exe_file:long(task:long) mm = get_task_mm(task); put_task_struct(task); if (mm) { - // Before using the mm pointer, let's be paranoid and - // make sure it is valid to read. - (void)kderef_buffer(NULL, mm, sizeof(struct mm_struct)); - -#ifdef STAPCONF_GET_MM_EXE_FILE - exe_file = get_mm_exe_file(mm); -#else - // When get_mm_exe_file() isn't exported, we'll have - // to handle this ourselves. - - // The down_read() function can sleep, so we'll call - // down_read_trylock() instead, which can fail. If if - // fails, we'll just pretend this task didn't have a - // cwd path. - if (down_read_trylock(&mm->mmap_sem)) { - exe_file = stap_find_exe_file(mm); - if (exe_file) { - get_file(exe_file); - } - up_read(&mm->mmap_sem); - } -#endif + exe_file = stap_find_exe_file(mm); mmput(mm); } -- 2.43.5