From 7615cae790c899bc8a82841c75c8ea9c6fa54df3 Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler" Date: Mon, 9 Nov 2020 19:18:19 -0500 Subject: [PATCH] PR26665: relayfs-on-procfs megapatch On platforms/configurations where debugfs is inaccessible (I'm side-eyeing at you, secureboot + kernel_lockdown), the stap runtime needs another way to hook up the relayfs / .cmd files to talk to staprun/stapio in userspace. kernel relayfs users all rely on debugfs (tied closely to struct dentry*), and filesystems where dentry*'s are not immediately available are SOL. Until now. This gigapatch forks pieces of runtime/transport/transport.c into debugfs and procfs alternatives. The debugfs fork is just like before. The procfs fork is new, and uses a proc_dir_entry <-> struct path look-up table to map between procfs objects and the dentry*'s that relayfs so loves. The debugfs alternative is default, except when lockdown mode is detected; then the runtime chooses procfs_p at the strategic moment. stap -DSTAP_TRANS_PROCFS or -DSTAP_TRANS_DEBUGFS lets the user override this heuristic. (Going to a procfs default is worth considering at some point.) The staprun/stapio userspace is updated to search both /sys/kernel/debug/systemtap and /proc/systemtap for the relay/.cmd file endpoints. Most of this gigapatch is moving code around in runtime/transport/ so relay_v2 is agnostic to its enclosing filesystem, going through hooks in transport.c to either procfs.c or debugfs.c. The old runtime/procfs.c file is stripped down to move common bits around a little. Signed-off-by: Frank Ch. Eigler --- buildrun.cxx | 3 +- runtime/linux/autoconf-lockdown-debugfs.c | 5 + runtime/procfs.c | 115 +------ runtime/transport/control.c | 2 +- runtime/transport/debugfs.c | 234 ++++++++++++- runtime/transport/procfs.c | 399 ++++++++++++++++------ runtime/transport/relay_v2.c | 59 ++-- runtime/transport/transport.c | 269 ++++++--------- runtime/transport/transport.h | 39 ++- staprun/ctl.c | 49 ++- staprun/relay.c | 7 + testsuite/buildok/trans-debugfs.stp | 3 + testsuite/buildok/trans-procfs.stp | 3 + 13 files changed, 738 insertions(+), 449 deletions(-) create mode 100644 runtime/linux/autoconf-lockdown-debugfs.c create mode 100755 testsuite/buildok/trans-debugfs.stp create mode 100755 testsuite/buildok/trans-procfs.stp diff --git a/buildrun.cxx b/buildrun.cxx index cc39e3173..9b4066d3a 100644 --- a/buildrun.cxx +++ b/buildrun.cxx @@ -516,7 +516,8 @@ compile_pass (systemtap_session& s) "STAPCONF_MMAP_LOCK", NULL); output_autoconf(s, o, cs, "autoconf-atomic_fetch_add_unless.c", "STAPCONF_ATOMIC_FETCH_ADD_UNLESS", NULL); - + output_autoconf(s, o, cs, "autoconf-lockdown-debugfs.c", "STAPCONF_LOCKDOWN_DEBUGFS", NULL); + // used by runtime/linux/netfilter.c output_exportconf(s, o2, "nf_register_hook", "STAPCONF_NF_REGISTER_HOOK"); diff --git a/runtime/linux/autoconf-lockdown-debugfs.c b/runtime/linux/autoconf-lockdown-debugfs.c new file mode 100644 index 000000000..33fe50a54 --- /dev/null +++ b/runtime/linux/autoconf-lockdown-debugfs.c @@ -0,0 +1,5 @@ +#include + +int foo(void) { + return security_locked_down(LOCKDOWN_DEBUGFS); +} diff --git a/runtime/procfs.c b/runtime/procfs.c index 8fa889d5f..121b12fe3 100644 --- a/runtime/procfs.c +++ b/runtime/procfs.c @@ -50,123 +50,10 @@ static int _stp_num_pde = 0; static struct proc_dir_entry *_stp_pde[STP_MAX_PROCFS_FILES]; -/* _stp_proc_root is the '/proc/systemtap/{module_name}' directory. */ -static struct proc_dir_entry *_stp_proc_root = NULL; - static void _stp_close_procfs(void); -/* - * Removes '/proc/systemtap/{module_name}'. Notice we're leaving - * '/proc/systemtap' behind. There is no way on newer kernels to know - * if a procfs directory is empty. - * - * NB: this is suitable to call late in the module cleanup function, - * and does not rely on any other facilities in the runtime. PR19833. - * See also PR15408. - */ -static void _stp_rmdir_proc_module(void) -{ - if (_stp_proc_root) { - proc_remove(_stp_proc_root); - _stp_proc_root = NULL; - } -} - - -/* - * Safely creates '/proc/systemtap' (if necessary) and - * '/proc/systemtap/{module_name}'. - * - * NB: this function is suitable to call from early in the the - * module-init function, and doesn't rely on any other facilities - * in our runtime. PR19833. See also PR15408. - */ -static int _stp_mkdir_proc_module(void) -{ - int found = 0; - static char proc_root_name[STP_MODULE_NAME_LEN + sizeof("systemtap/")]; -#if defined(STAPCONF_PATH_LOOKUP) || defined(STAPCONF_KERN_PATH_PARENT) - struct nameidata nd; -#else /* STAPCONF_VFS_PATH_LOOKUP or STAPCONF_KERN_PATH */ - struct path path; -#if defined(STAPCONF_VFS_PATH_LOOKUP) - struct vfsmount *mnt; -#endif - int rc; -#endif /* STAPCONF_VFS_PATH_LOOKUP or STAPCONF_KERN_PATH */ - - if (_stp_proc_root != NULL) - return 0; - -#if defined(STAPCONF_PATH_LOOKUP) || defined(STAPCONF_KERN_PATH_PARENT) - /* Why "/proc/systemtap/foo"? kern_path_parent() is basically - * the same thing as calling the old path_lookup() with flags - * set to LOOKUP_PARENT, which means to look up the parent of - * the path, which in this case is "/proc/systemtap". */ - if (! kern_path_parent("/proc/systemtap/foo", &nd)) { - found = 1; -#ifdef STAPCONF_NAMEIDATA_CLEANUP - path_put(&nd.path); -#else /* !STAPCONF_NAMEIDATA_CLEANUP */ - path_release(&nd); -#endif /* !STAPCONF_NAMEIDATA_CLEANUP */ - } - -#elif defined(STAPCONF_KERN_PATH) - /* Prefer kern_path() over vfs_path_lookup(), since on some - * kernels the declaration for vfs_path_lookup() was moved to - * a private header. */ - - /* See if '/proc/systemtap' exists. */ - rc = kern_path("/proc/systemtap", 0, &path); - if (rc == 0) { - found = 1; - path_put (&path); - } - -#else /* STAPCONF_VFS_PATH_LOOKUP */ - /* See if '/proc/systemtap' exists. */ - if (! init_pid_ns.proc_mnt) { - errk("Unable to create '/proc/systemap':" - " '/proc' doesn't exist.\n"); - goto done; - } - mnt = init_pid_ns.proc_mnt; - rc = vfs_path_lookup(mnt->mnt_root, mnt, "systemtap", 0, &path); - if (rc == 0) { - found = 1; - path_put (&path); - } -#endif /* STAPCONF_VFS_PATH_LOOKUP */ - - /* If we couldn't find "/proc/systemtap", create it. */ - if (!found) { - struct proc_dir_entry *de; - de = proc_mkdir ("systemtap", NULL); - if (de == NULL) { - errk("Unable to create '/proc/systemap':" - " proc_mkdir failed.\n"); - goto done; - } - } - - /* Create the "systemtap/{module_name} directory in procfs. */ - strlcpy(proc_root_name, "systemtap/", sizeof(proc_root_name)); - strlcat(proc_root_name, THIS_MODULE->name, sizeof(proc_root_name)); - _stp_proc_root = proc_mkdir(proc_root_name, NULL); -#ifdef STAPCONF_PROCFS_OWNER - if (_stp_proc_root != NULL) - _stp_proc_root->owner = THIS_MODULE; -#endif - if (_stp_proc_root == NULL) - errk("Unable to create '/proc/systemap/%s':" - " proc_mkdir failed.\n", THIS_MODULE->name); - -done: - return (_stp_proc_root) ? 0 : -EINVAL; -} #ifdef _STP_ALLOW_PROCFS_PATH_SUBDIRS /* @@ -199,7 +86,7 @@ static int _stp_create_procfs(const char *path, if (_stp_num_pde >= STP_MAX_PROCFS_FILES) goto too_many; - last_dir = _stp_proc_root; + last_dir = _stp_procfs_module_dir; /* if no path, use default one */ if (strlen(path) == 0) diff --git a/runtime/transport/control.c b/runtime/transport/control.c index ff7338db3..9343b3c28 100644 --- a/runtime/transport/control.c +++ b/runtime/transport/control.c @@ -737,7 +737,7 @@ static int _stp_register_ctl_channel(void) if (unlikely(_stp_ctl_alloc_special_buffers() != 0)) goto err0; - if (_stp_register_ctl_channel_fs() != 0) + if (_stp_register_ctl_channel_fs() != 0) // procfs or debugfs decision time goto err0; return 0; diff --git a/runtime/transport/debugfs.c b/runtime/transport/debugfs.c index a5ed276ef..28a5bf89d 100644 --- a/runtime/transport/debugfs.c +++ b/runtime/transport/debugfs.c @@ -13,23 +13,23 @@ #include "transport.h" #include "../uidgid_compatibility.h" -/* Defines the number of buffers allocated in control.c (which #includes - this file) for the _stp_pool_q. This is the number of .cmd messages - the module can store before they have to be read by stapio. - 40 is somewhat arbitrary, 8 pre-allocated messages, 32 dynamic. */ -#define STP_DEFAULT_BUFFERS 40 + + /* Always returns zero, we just push all messages on the _stp_ctl_ready_q. */ -inline static int _stp_ctl_write_fs(int type, void *data, unsigned len) +inline static int _stp_debugfs_ctl_write_fs(int type, void *data, unsigned len) { return 0; } -static struct dentry *_stp_cmd_file = NULL; +static struct dentry *__stp_debugfs_root_dir = NULL; // DEBUGFS/systemtap/ +static struct dentry *__stp_debugfs_module_dir = NULL; // DEBUGFS/systemtap/MODULE/ +static struct dentry *_stp_cmd_file = NULL; // DEBUGFS/systemtap/MODULE/.cmd + -static int _stp_register_ctl_channel_fs(void) +static int _stp_debugfs_register_ctl_channel_fs(void) { - struct dentry *module_dir = _stp_get_module_dir(); + struct dentry *module_dir = _stp_debugfs_get_module_dir(); if (module_dir == NULL) { errk("no module directory found.\n"); return -1; @@ -55,8 +55,222 @@ static int _stp_register_ctl_channel_fs(void) return 0; } -static void _stp_unregister_ctl_channel_fs(void) +static void _stp_debugfs_unregister_ctl_channel_fs(void) { if (_stp_cmd_file) debugfs_remove(_stp_cmd_file); } + + +static int _stp_debugfs_transport_fs_init(const char *module_name) +{ + struct dentry *root_dir; + + dbug_trans(1, "entry\n"); + if (module_name == NULL) + return -1; + + if (!_stp_lock_transport_dir()) { + errk("Couldn't lock transport directory.\n"); + return -1; + } + + root_dir = _stp_debugfs_get_root_dir(); + if (root_dir == NULL) { + _stp_unlock_transport_dir(); + return -1; + } + + __stp_debugfs_module_dir = debugfs_create_dir(module_name, root_dir); + if (!__stp_debugfs_module_dir) { + errk("Could not create module directory \"%s\"\n", + module_name); + _stp_debugfs_remove_root_dir(); + _stp_unlock_transport_dir(); + return -1; + } + else if (IS_ERR(__stp_debugfs_module_dir)) { + errk("Could not create module directory \"%s\", error %ld\n", + module_name, -PTR_ERR(__stp_debugfs_module_dir)); + _stp_debugfs_remove_root_dir(); + _stp_unlock_transport_dir(); + return -1; + } + + if (_stp_transport_data_fs_init() != 0) { + debugfs_remove(__stp_debugfs_module_dir); + __stp_debugfs_module_dir = NULL; + _stp_debugfs_remove_root_dir(); + _stp_unlock_transport_dir(); + return -1; + } + _stp_unlock_transport_dir(); + dbug_trans(1, "returning 0\n"); + return 0; +} + +static void _stp_debugfs_transport_fs_close(void) +{ + dbug_trans(1, "stp_transport_fs_close\n"); + _stp_transport_data_fs_close(); + if (__stp_debugfs_module_dir) { + if (!_stp_lock_transport_dir()) { + errk("Couldn't lock transport directory.\n"); + return; + } + + debugfs_remove(__stp_debugfs_module_dir); + __stp_debugfs_module_dir = NULL; + + _stp_debugfs_remove_root_dir(); + _stp_unlock_transport_dir(); + } +} + + +static struct dentry *_stp_lockfile = NULL; + +static int _stp_lock_transport_dir(void) +{ + int numtries = 0; + + while ((_stp_lockfile = debugfs_create_dir("systemtap_lock", NULL)) == NULL) { + if (numtries++ >= 50) + return 0; + msleep(50); + } + return 1; +} + +static void _stp_unlock_transport_dir(void) +{ + if (_stp_lockfile) { + debugfs_remove(_stp_lockfile); + _stp_lockfile = NULL; + } +} + +/* _stp_debugfs_get_root_dir() - creates root directory or returns + * a pointer to it if it already exists. + * + * The caller *must* lock the transport directory. + */ + +static struct dentry *_stp_debugfs_get_root_dir(void) +{ + struct file_system_type *fs; + struct super_block *sb; + const char *name = "systemtap"; + + if (__stp_debugfs_root_dir != NULL) { + return __stp_debugfs_root_dir; + } + + fs = get_fs_type("debugfs"); + if (!fs) { + errk("Couldn't find debugfs filesystem.\n"); + return NULL; + } + + __stp_debugfs_root_dir = debugfs_create_dir(name, NULL); + if (__stp_debugfs_root_dir == ERR_PTR(-EEXIST)) /* some kernels signal duplication this way */ + __stp_debugfs_root_dir = NULL; + if (!__stp_debugfs_root_dir) { + /* Couldn't create it because it is already there, so + * find it. */ +#ifdef STAPCONF_FS_SUPERS_HLIST + sb = hlist_entry(fs->fs_supers.first, struct super_block, + s_instances); +#else + sb = list_entry(fs->fs_supers.next, struct super_block, + s_instances); +#endif + _stp_lock_inode(sb->s_root->d_inode); + __stp_debugfs_root_dir = lookup_one_len(name, sb->s_root, + strlen(name)); + _stp_unlock_inode(sb->s_root->d_inode); + if (!IS_ERR(__stp_debugfs_root_dir)) + dput(__stp_debugfs_root_dir); + else { + __stp_debugfs_root_dir = NULL; + errk("Could not create or find transport directory.\n"); + } + } + else if (IS_ERR(__stp_debugfs_root_dir)) { + __stp_debugfs_root_dir = NULL; + errk("Could not create root directory \"%s\", error %ld\n", name, + -PTR_ERR(__stp_debugfs_root_dir)); + } + + return __stp_debugfs_root_dir; +} + +/* _stp_debugfs_remove_root_dir() - removes root directory (if empty) + * + * The caller *must* lock the transport directory. + */ + +static void _stp_debugfs_remove_root_dir(void) +{ + if (__stp_debugfs_root_dir) { + if (simple_empty(__stp_debugfs_root_dir)) { + debugfs_remove(__stp_debugfs_root_dir); + } + __stp_debugfs_root_dir = NULL; + } +} + +// this is used by relay_v2 to place the traceN relayfs files. +static struct dentry *_stp_debugfs_get_module_dir(void) +{ + return __stp_debugfs_module_dir; +} + + +// relay_v2 callbacks for creating per-cpu files + +static int __stp_debugfs_relay_remove_buf_file_callback(struct dentry *dentry) +{ + debugfs_remove(dentry); + return 0; +} + + +static struct dentry * +__stp_debugfs_relay_create_buf_file_callback(const char *filename, + struct dentry *parent, +#ifdef STAPCONF_RELAY_UMODE_T + umode_t mode, +#else + int mode, +#endif + struct rchan_buf *buf, + int *is_global) +{ + struct dentry *file = debugfs_create_file(filename, mode, parent, buf, + &relay_file_operations_w_owner); + /* + * Here's what 'is_global' does (from linux/relay.h): + * + * Setting the is_global outparam to a non-zero value will + * cause relay_open() to create a single global buffer rather + * than the default set of per-cpu buffers. + */ + if (is_global) { +#ifdef STP_BULKMODE + *is_global = 0; +#else + *is_global = 1; +#endif + } + + if (IS_ERR(file)) { + file = NULL; + } + else if (file) { + file->d_inode->i_uid = KUIDT_INIT(_stp_uid); + file->d_inode->i_gid = KGIDT_INIT(_stp_gid); + } + return file; +} + diff --git a/runtime/transport/procfs.c b/runtime/transport/procfs.c index 98cb20586..520e17cd5 100644 --- a/runtime/transport/procfs.c +++ b/runtime/transport/procfs.c @@ -9,64 +9,138 @@ * later version. */ -#include "../procfs.c" // for _stp_mkdir_proc_module() #include "relay_compat.h" -#define STP_DEFAULT_BUFFERS 256 -#ifdef STP_BULKMODE -/* handle the per-cpu subbuf info read for relayfs */ -static ssize_t _stp_proc_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) -{ - int num; - struct _stp_buf_info out; - struct rchan_buf *sub_buf; +/* _stp_procfs_module_dir is the '/proc/systemtap/{module_name}' directory. */ +static struct proc_dir_entry *_stp_procfs_module_dir = NULL; +static struct path _stp_procfs_module_dir_path; + +/* + * Safely creates '/proc/systemtap' (if necessary) and + * '/proc/systemtap/{module_name}'. + * + * NB: this function is suitable to call from early in the the + * module-init function, and doesn't rely on any other facilities + * in our runtime. PR19833. See also PR15408. + */ +static int _stp_mkdir_proc_module(void) +{ + int found = 0; + static char proc_root_name[STP_MODULE_NAME_LEN + sizeof("systemtap/")]; +#if defined(STAPCONF_PATH_LOOKUP) || defined(STAPCONF_KERN_PATH_PARENT) + struct nameidata nd; +#else /* STAPCONF_VFS_PATH_LOOKUP or STAPCONF_KERN_PATH */ + struct path path; +#if defined(STAPCONF_VFS_PATH_LOOKUP) + struct vfsmount *mnt; +#endif + int rc; +#endif /* STAPCONF_VFS_PATH_LOOKUP or STAPCONF_KERN_PATH */ - int cpu = *(int *)(PDE(file->f_dentry->d_inode)->data); + if (_stp_procfs_module_dir != NULL) + return 0; - if (!_stp_relay_data.rchan) - return -EINVAL; +#if defined(STAPCONF_PATH_LOOKUP) || defined(STAPCONF_KERN_PATH_PARENT) + /* Why "/proc/systemtap/foo"? kern_path_parent() is basically + * the same thing as calling the old path_lookup() with flags + * set to LOOKUP_PARENT, which means to look up the parent of + * the path, which in this case is "/proc/systemtap". */ + if (! kern_path_parent("/proc/systemtap/foo", &nd)) { + found = 1; +#ifdef STAPCONF_NAMEIDATA_CLEANUP + path_put(&nd.path); +#else /* !STAPCONF_NAMEIDATA_CLEANUP */ + path_release(&nd); +#endif /* !STAPCONF_NAMEIDATA_CLEANUP */ + } + +#elif defined(STAPCONF_KERN_PATH) + /* Prefer kern_path() over vfs_path_lookup(), since on some + * kernels the declaration for vfs_path_lookup() was moved to + * a private header. */ + + /* See if '/proc/systemtap' exists. */ + rc = kern_path("/proc/systemtap", 0, &path); + if (rc == 0) { + found = 1; + path_put (&path); + } - out.cpu = cpu; - sub_buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf, cpu); - out.produced = atomic_read(&sub_buf->subbufs_produced); - out.consumed = atomic_read(&sub_buf->subbufs_consumed); - out.flushing = _stp_relay_data.flushing; +#else /* STAPCONF_VFS_PATH_LOOKUP */ + /* See if '/proc/systemtap' exists. */ + if (! init_pid_ns.proc_mnt) { + errk("Unable to create '/proc/systemap':" + " '/proc' doesn't exist.\n"); + goto done; + } + mnt = init_pid_ns.proc_mnt; + rc = vfs_path_lookup(mnt->mnt_root, mnt, "systemtap", 0, &path); + if (rc == 0) { + found = 1; + path_put (&path); + } +#endif /* STAPCONF_VFS_PATH_LOOKUP */ - num = sizeof(out); - if (copy_to_user(buf, &out, num)) - return -EFAULT; + /* If we couldn't find "/proc/systemtap", create it. */ + if (!found) { + struct proc_dir_entry *de; - return num; + de = proc_mkdir ("systemtap", NULL); + if (de == NULL) { + errk("Unable to create '/proc/systemap':" + " proc_mkdir failed.\n"); + goto done; + } + } + + /* Create the "systemtap/{module_name} directory in procfs. */ + strlcpy(proc_root_name, "/proc/systemtap/", sizeof(proc_root_name)); + strlcat(proc_root_name, THIS_MODULE->name, sizeof(proc_root_name)); + _stp_procfs_module_dir = proc_mkdir(&proc_root_name[6], NULL); // skip the /proc/ +#ifdef STAPCONF_PROCFS_OWNER + if (_stp_procfs_module_dir != NULL) + _stp_procfs_module_dir->owner = THIS_MODULE; +#endif + if (_stp_procfs_module_dir == NULL) + errk("Unable to create '/proc/systemap/%s':" + " proc_mkdir failed.\n", THIS_MODULE->name); + else { + rc = kern_path(proc_root_name, 0, &_stp_procfs_module_dir_path); + if (rc != 0) { + errk("Unable to resolve /proc/systemap/%s':" + " to path.\n", THIS_MODULE->name); + proc_remove(_stp_procfs_module_dir); + _stp_procfs_module_dir = NULL; + return rc; + } + } + +done: + return (_stp_procfs_module_dir) ? 0 : -EINVAL; } -/* handle the per-cpu subbuf info write for relayfs */ -static ssize_t _stp_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) -{ - struct _stp_consumed_info info; - int cpu = *(int *)(PDE(file->f_dentry->d_inode)->data); - if (copy_from_user(&info, buf, count)) - return -EFAULT; - relay_subbufs_consumed(_stp_relay_data.rchan, cpu, info.consumed); - return count; +/* + * Removes '/proc/systemtap/{module_name}'. Notice we're leaving + * '/proc/systemtap' behind. There is no way on newer kernels to know + * if a procfs directory is empty. + * + * NB: this is suitable to call late in the module cleanup function, + * and does not rely on any other facilities in the runtime. PR19833. + * See also PR15408. + */ +static void _stp_rmdir_proc_module(void) +{ + if (_stp_procfs_module_dir) { + path_put(& _stp_procfs_module_dir_path); + proc_remove(_stp_procfs_module_dir); + _stp_procfs_module_dir = NULL; + } } -#ifdef STAPCONF_PROC_OPS -static struct proc_ops _stp_proc_fops = { - .proc_read = _stp_proc_read, - .proc_write = _stp_proc_write, -}; -#else -static struct file_operations _stp_proc_fops = { - .owner = THIS_MODULE, - .read = _stp_proc_read, - .write = _stp_proc_write, -}; -#endif -#endif /* STP_BULKMODE */ -inline static int _stp_ctl_write_fs(int type, void *data, unsigned len) +inline static int _stp_procfs_ctl_write_fs(int type, void *data, unsigned len) { struct _stp_buffer *bptr; unsigned long flags; @@ -90,7 +164,7 @@ inline static int _stp_ctl_write_fs(int type, void *data, unsigned len) return 0; } -static int _stp_ctl_read_bufsize(char *page, char **start, off_t off, int count, int *eof, void *data) +static int _stp_proc_ctl_read_bufsize(char *page, char **start, off_t off, int count, int *eof, void *data) { int len = sprintf(page, "%d,%d\n", _stp_nsubbufs, _stp_subbuf_size); if (len <= off + count) @@ -104,94 +178,199 @@ static int _stp_ctl_read_bufsize(char *page, char **start, off_t off, int count, return len; } -static int _stp_register_ctl_channel_fs(void) + +static struct file_operations _stp_ctl_fops_cmd; +#ifdef STAPCONF_PROC_OPS /* control.c */ +static struct proc_ops _stp_ctl_proc_ops_cmd; +#endif + + +static int _stp_procfs_register_ctl_channel_fs(void) { -#ifdef STP_BULKMODE - int i; - int j; - char buf[32]; struct proc_dir_entry *bs = NULL; -#endif struct proc_dir_entry *de; if (_stp_mkdir_proc_module()) goto err0; -#ifdef STP_BULKMODE - /* now for each cpu "n", create /proc/systemtap/module_name/n */ - for_each_possible_cpu(i) { - snprintf(buf, sizeof(buf), "%d", i); - de = create_proc_entry(buf, 0600, _stp_proc_root); - if (de == NULL) - goto err1; - de->uid = _stp_uid; - de->gid = _stp_gid; + /* create /proc/systemtap/module_name/.cmd */ #ifdef STAPCONF_PROC_OPS - de->proc_ops = &_stp_proc_fops; + de = proc_create(".cmd", 0600, _stp_procfs_module_dir, &_stp_ctl_proc_ops_cmd); #else - de->proc_fops = &_stp_proc_fops; + de = proc_create(".cmd", 0600, _stp_procfs_module_dir, &_stp_ctl_fops_cmd); #endif - de->data = _stp_kmalloc(sizeof(int)); - if (de->data == NULL) { - remove_proc_entry(buf, _stp_proc_root); - goto err1; - } - *(int *)de->data = i; - } - bs = create_proc_read_entry("bufsize", 0, _stp_proc_root, _stp_ctl_read_bufsize, NULL); -#endif /* STP_BULKMODE */ - - /* create /proc/systemtap/module_name/.cmd */ - de = create_proc_entry(".cmd", 0600, _stp_proc_root); if (de == NULL) goto err1; - de->uid = _stp_uid; - de->gid = _stp_gid; -#ifdef STAPCONF_PROC_OPS - de->proc_ops = &_stp_ctl_proc_ops_cmd; -#else - de->proc_fops = &_stp_ctl_fops_cmd; -#endif + proc_set_user(de, KUIDT_INIT(_stp_uid), KGIDT_INIT(_stp_gid)); return 0; err1: -#ifdef STP_BULKMODE - for (de = _stp_proc_root->subdir; de; de = de->next) - _stp_kfree(de->data); - for_each_possible_cpu(j) { - if (j == i) - break; - snprintf(buf, sizeof(buf), "%d", j); - remove_proc_entry(buf, _stp_proc_root); - - } - if (bs) - remove_proc_entry("bufsize", _stp_proc_root); -#endif /* STP_BULKMODE */ _stp_rmdir_proc_module(); err0: return -1; } -static void _stp_unregister_ctl_channel_fs(void) +static void _stp_procfs_unregister_ctl_channel_fs(void) { -#ifdef STP_BULKMODE - char buf[32]; - int i; - struct proc_dir_entry *de; + remove_proc_entry(".cmd", _stp_procfs_module_dir); + _stp_rmdir_proc_module(); +} - dbug_trans(1, "unregistering procfs\n"); - for (de = _stp_proc_root->subdir; de; de = de->next) - _stp_kfree(de->data); - for_each_possible_cpu(i) { - snprintf(buf, sizeof(buf), "%d", i); - remove_proc_entry(buf, _stp_proc_root); - } - remove_proc_entry("bufsize", _stp_proc_root); -#endif /* STP_BULKMODE */ - remove_proc_entry(".cmd", _stp_proc_root); - _stp_rmdir_proc_module(); +#ifdef STAPCONF_PROC_OPS +struct proc_ops relay_procfs_operations; +#else +struct file_operations relay_procfs_operations; +#endif + + +static int _stp_procfs_transport_fs_init(const char *module_name) +{ +#ifdef STAPCONF_PROC_OPS + relay_procfs_operations.proc_open = relay_file_operations.open; + relay_procfs_operations.proc_poll = relay_file_operations.poll; + relay_procfs_operations.proc_mmap = relay_file_operations.mmap; + relay_procfs_operations.proc_read = relay_file_operations.read; + relay_procfs_operations.proc_lseek = relay_file_operations.llseek; + relay_procfs_operations.proc_release = relay_file_operations.release; +#else + relay_procfs_operations = relay_file_operations; + relay_procfs_operations.owner = THIS_MODULE; +#endif + + if (_stp_mkdir_proc_module()) // get the _stp_procfs_module_dir* created + return -1; + + dbug_trans(1, "transport_fs_init dentry=%08lx pde=%08lx ", + (unsigned long) _stp_procfs_module_dir_path.dentry, + (unsigned long) _stp_procfs_module_dir); + + if (_stp_transport_data_fs_init() != 0) + return -1; + + return 0; +} + + +static void _stp_procfs_transport_fs_close(void) +{ + _stp_transport_data_fs_close(); +} + + + +// We need to map procfs concepts of proc_dir_entry* and relayfs/vfs of path/dentry*. +#define MAX_RELAYFS_FILES NR_CPUS +struct procfs_relay_file +{ + struct path p; // contains the dentry* + struct proc_dir_entry *pde; // entry valid if this pointer non-NULL +}; +struct procfs_relay_file p_r_files[MAX_RELAYFS_FILES]; + + + +static struct dentry *_stp_procfs_get_module_dir(void) +{ + return _stp_procfs_module_dir_path.dentry; +} + + +static int __stp_procfs_relay_remove_buf_file_callback(struct dentry *dentry) +{ + unsigned i; + struct proc_dir_entry *pde = NULL; + + // find the corresponding pde* + for (i=0; iname, filename); + + // find spot to plop this + for (i=0; id_inode; + in->i_private = buf; + + // success! + goto out; + +out1: + proc_remove (pde); + +out: + dbug_trans(1, "create-buf name=%s parent=%08lx -> i=%u rc=%d de=%08lx", + filename, (unsigned long) parent, + i, rc, (unsigned long) de); + return de; } diff --git a/runtime/transport/relay_v2.c b/runtime/transport/relay_v2.c index 1a0a080e4..ff621f71d 100644 --- a/runtime/transport/relay_v2.c +++ b/runtime/transport/relay_v2.c @@ -29,7 +29,6 @@ #include #include #include -#include #include "../linux/timer_compatibility.h" #include "../uidgid_compatibility.h" #include "relay_compat.h" @@ -163,15 +162,37 @@ static void _stp_transport_data_fs_overwrite(int overwrite) _stp_relay_data.overwrite_flag = overwrite; } - return 0; + +/* + * Keep track of how many times we encountered a full subbuffer, to aid + * the user space app in telling how many lost events there were. + */ +static int __stp_relay_subbuf_start_callback(struct rchan_buf *buf, + void *subbuf, void *prev_subbuf, + size_t prev_padding) +{ + if (_stp_relay_data.overwrite_flag || !relay_buf_full(buf)) + return 1; + +#ifdef _STP_USE_DROPPED_FILE + atomic_inc(&_stp_relay_data.dropped); +#endif + return 0; } + +// PR26665: demultiplex debugfs vs procfs host + static int __stp_relay_remove_buf_file_callback(struct dentry *dentry) { - debugfs_remove(dentry); + if (debugfs_p) + return __stp_debugfs_relay_remove_buf_file_callback(dentry); + if (procfs_p) + return __stp_procfs_relay_remove_buf_file_callback(dentry); return 0; } + static struct dentry * __stp_relay_create_buf_file_callback(const char *filename, struct dentry *parent, @@ -183,39 +204,21 @@ __stp_relay_create_buf_file_callback(const char *filename, struct rchan_buf *buf, int *is_global) { - struct dentry *file = debugfs_create_file(filename, mode, parent, buf, - &relay_file_operations_w_owner); - /* - * Here's what 'is_global' does (from linux/relay.h): - * - * Setting the is_global outparam to a non-zero value will - * cause relay_open() to create a single global buffer rather - * than the default set of per-cpu buffers. - */ - if (is_global) { -#ifdef STP_BULKMODE - *is_global = 0; -#else - *is_global = 1; -#endif - } - - if (IS_ERR(file)) { - file = NULL; - } - else if (file) { - file->d_inode->i_uid = KUIDT_INIT(_stp_uid); - file->d_inode->i_gid = KGIDT_INIT(_stp_gid); - } - return file; + if (debugfs_p) + return __stp_debugfs_relay_create_buf_file_callback(filename, parent, mode, buf, is_global); + if (procfs_p) + return __stp_procfs_relay_create_buf_file_callback(filename, parent, mode, buf, is_global); + return NULL; } + static struct rchan_callbacks __stp_relay_callbacks = { .subbuf_start = __stp_relay_subbuf_start_callback, .create_buf_file = __stp_relay_create_buf_file_callback, .remove_buf_file = __stp_relay_remove_buf_file_callback, }; + static void _stp_transport_data_fs_start(void) { if (atomic_read (&_stp_relay_data.transport_state) == STP_TRANSPORT_INITIALIZED) { diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c index 01b9b4171..bb4a98bd3 100644 --- a/runtime/transport/transport.c +++ b/runtime/transport/transport.c @@ -21,6 +21,9 @@ #include #include #include +#ifdef CONFIG_SECURITY_LOCKDOWN_LSM +#include +#endif #include "../uidgid_compatibility.h" static int _stp_exit_flag = 0; @@ -55,10 +58,16 @@ static inline void _stp_unlock_inode(struct inode *inode); #define STP_CTL_TIMER_INTERVAL ((HZ+49)/50) #endif +/* Defines the number of buffers allocated in control.c (which #includes + this file) for the _stp_pool_q. This is the number of .cmd messages + the module can store before they have to be read by stapio. + 40 is somewhat arbitrary, 8 pre-allocated messages, 32 dynamic. */ +#define STP_DEFAULT_BUFFERS 256 #include "control.h" #include "relay_v2.c" #include "debugfs.c" +#include "procfs.c" #include "control.c" static unsigned _stp_nsubbufs = 8; @@ -97,6 +106,99 @@ static struct notifier_block _stp_module_panic_notifier_nb = { static struct timer_list _stp_ctl_work_timer; + + +// ------------------------------------------------------------------------ + +// Dispatching functions to choose between procfs and debugfs variants +// of the transport. PR26665 + +static int _stp_transport_fs_init(const char *module_name) +{ + // BTW: testing the other !FOO_p first is to protect against repeated + // invocations of this function with security_locked_down() changing +#ifdef STAPCONF_LOCKDOWN_DEBUGFS + if (!debugfs_p && security_locked_down (LOCKDOWN_DEBUGFS)) { + procfs_p = 1; + dbug_trans(1, "choosing procfs_p=1\n"); + } +#endif + if (!procfs_p) { + debugfs_p = 1; + dbug_trans(1, "choosing debugfs_p=1\n"); + } + +#ifdef STAP_TRANS_PROCFS + procfs_p = 1; + debugfs_p = 0; + dbug_trans(1, "forcing procfs_p=1\n"); +#endif +#ifdef STAP_TRANS_DEBUGFS + procfs_p = 0; + debugfs_p = 1; + dbug_trans(1, "forcing debugfs_p=1\n"); +#endif + + if (debugfs_p) + return _stp_debugfs_transport_fs_init(module_name); + if (procfs_p) + return _stp_procfs_transport_fs_init(module_name); + return -ENOSYS; +} + +static void _stp_transport_fs_close(void) +{ + if (debugfs_p) + _stp_debugfs_transport_fs_close(); + if (procfs_p) + _stp_procfs_transport_fs_close(); +} + + +static int _stp_ctl_write_fs(int type, void *data, unsigned len) +{ + if (procfs_p) + return _stp_procfs_ctl_write_fs (type, data, len); + if (debugfs_p) + return _stp_debugfs_ctl_write_fs (type, data, len); + return -ENOSYS; +} + + +static int _stp_register_ctl_channel_fs(void) +{ + if (debugfs_p) + return _stp_debugfs_register_ctl_channel_fs(); + if (procfs_p) + return _stp_procfs_register_ctl_channel_fs(); + + return -ENOSYS; +} + + +static void _stp_unregister_ctl_channel_fs(void) +{ + if (procfs_p) + _stp_procfs_unregister_ctl_channel_fs(); + if (debugfs_p) + _stp_debugfs_unregister_ctl_channel_fs(); +} + + + +static struct dentry *_stp_get_module_dir(void) +{ + if (procfs_p) + return _stp_procfs_get_module_dir(); + if (debugfs_p) + return _stp_debugfs_get_module_dir(); + return NULL; +} + + + +// ------------------------------------------------------------------------ + /* * _stp_handle_start - handle STP_START */ @@ -579,173 +681,6 @@ static inline void _stp_unlock_inode(struct inode *inode) #endif } -static struct dentry *_stp_lockfile = NULL; - -static int _stp_lock_transport_dir(void) -{ - int numtries = 0; - - while ((_stp_lockfile = debugfs_create_dir("systemtap_lock", NULL)) == NULL) { - if (numtries++ >= 50) - return 0; - msleep(50); - } - return 1; -} - -static void _stp_unlock_transport_dir(void) -{ - if (_stp_lockfile) { - debugfs_remove(_stp_lockfile); - _stp_lockfile = NULL; - } -} - -static struct dentry *__stp_root_dir = NULL; - -/* _stp_get_root_dir() - creates root directory or returns - * a pointer to it if it already exists. - * - * The caller *must* lock the transport directory. - */ - -static struct dentry *_stp_get_root_dir(void) -{ - struct file_system_type *fs; - struct super_block *sb; - const char *name = "systemtap"; - - if (__stp_root_dir != NULL) { - return __stp_root_dir; - } - - fs = get_fs_type("debugfs"); - if (!fs) { - errk("Couldn't find debugfs filesystem.\n"); - return NULL; - } - - __stp_root_dir = debugfs_create_dir(name, NULL); - if (__stp_root_dir == ERR_PTR(-EEXIST)) /* some kernels signal duplication this way */ - __stp_root_dir = NULL; - if (!__stp_root_dir) { - /* Couldn't create it because it is already there, so - * find it. */ -#ifdef STAPCONF_FS_SUPERS_HLIST - sb = hlist_entry(fs->fs_supers.first, struct super_block, - s_instances); -#else - sb = list_entry(fs->fs_supers.next, struct super_block, - s_instances); -#endif - _stp_lock_inode(sb->s_root->d_inode); - __stp_root_dir = lookup_one_len(name, sb->s_root, - strlen(name)); - _stp_unlock_inode(sb->s_root->d_inode); - if (!IS_ERR(__stp_root_dir)) - dput(__stp_root_dir); - else { - __stp_root_dir = NULL; - errk("Could not create or find transport directory.\n"); - } - } - else if (IS_ERR(__stp_root_dir)) { - __stp_root_dir = NULL; - errk("Could not create root directory \"%s\", error %ld\n", name, - -PTR_ERR(__stp_root_dir)); - } - - return __stp_root_dir; -} - -/* _stp_remove_root_dir() - removes root directory (if empty) - * - * The caller *must* lock the transport directory. - */ - -static void _stp_remove_root_dir(void) -{ - if (__stp_root_dir) { - if (simple_empty(__stp_root_dir)) { - debugfs_remove(__stp_root_dir); - } - __stp_root_dir = NULL; - } -} - -static struct dentry *__stp_module_dir = NULL; - -static struct dentry *_stp_get_module_dir(void) -{ - return __stp_module_dir; -} - -static int _stp_transport_fs_init(const char *module_name) -{ - struct dentry *root_dir; - - dbug_trans(1, "entry\n"); - if (module_name == NULL) - return -1; - - if (!_stp_lock_transport_dir()) { - errk("Couldn't lock transport directory.\n"); - return -1; - } - - root_dir = _stp_get_root_dir(); - if (root_dir == NULL) { - _stp_unlock_transport_dir(); - return -1; - } - - __stp_module_dir = debugfs_create_dir(module_name, root_dir); - if (!__stp_module_dir) { - errk("Could not create module directory \"%s\"\n", - module_name); - _stp_remove_root_dir(); - _stp_unlock_transport_dir(); - return -1; - } - else if (IS_ERR(__stp_module_dir)) { - errk("Could not create module directory \"%s\", error %ld\n", - module_name, -PTR_ERR(__stp_module_dir)); - _stp_remove_root_dir(); - _stp_unlock_transport_dir(); - return -1; - } - - if (_stp_transport_data_fs_init() != 0) { - debugfs_remove(__stp_module_dir); - __stp_module_dir = NULL; - _stp_remove_root_dir(); - _stp_unlock_transport_dir(); - return -1; - } - _stp_unlock_transport_dir(); - dbug_trans(1, "returning 0\n"); - return 0; -} - -static void _stp_transport_fs_close(void) -{ - dbug_trans(1, "stp_transport_fs_close\n"); - - _stp_transport_data_fs_close(); - - if (__stp_module_dir) { - if (!_stp_lock_transport_dir()) { - errk("Couldn't lock transport directory.\n"); - return; - } - - debugfs_remove(__stp_module_dir); - __stp_module_dir = NULL; - - _stp_remove_root_dir(); - _stp_unlock_transport_dir(); - } -} /* NB: Accessed from tzinfo.stp tapset */ diff --git a/runtime/transport/transport.h b/runtime/transport/transport.h index 4ecf7a9f6..51723b7f5 100644 --- a/runtime/transport/transport.h +++ b/runtime/transport/transport.h @@ -5,6 +5,7 @@ * @brief Header file for stp transport */ +#include "relay_compat.h" #include "transport_msgs.h" /* The size of print buffers. This limits the maximum */ @@ -19,17 +20,53 @@ static unsigned _stp_nsubbufs; static unsigned _stp_subbuf_size; static pid_t _stp_target; +// flags to indicate choice of host filesystem for the relayfs +// pseudofiles; chosen within _stp_transport_fs_init +static unsigned procfs_p = 0; +static unsigned debugfs_p = 0; + static int _stp_transport_init(void); static void _stp_transport_close(void); static int _stp_lock_transport_dir(void); static void _stp_unlock_transport_dir(void); -static struct dentry *_stp_get_root_dir(void); +static struct dentry *_stp_debugfs_get_root_dir(void); +static void _stp_debugfs_remove_root_dir(void); + static struct dentry *_stp_get_module_dir(void); +static struct dentry *_stp_procfs_get_module_dir(void); +static struct dentry *_stp_debugfs_get_module_dir(void); static int _stp_transport_fs_init(const char *module_name); static void _stp_transport_fs_close(void); +static int _stp_debugfs_transport_fs_init(const char *module_name); +static void _stp_debugfs_transport_fs_close(void); +static int _stp_procfs_transport_fs_init(const char *module_name); +static void _stp_procfs_transport_fs_close(void); + +static int __stp_debugfs_relay_remove_buf_file_callback(struct dentry *dentry); +static int __stp_procfs_relay_remove_buf_file_callback(struct dentry *dentry); +struct rchan_buf; +static struct dentry * __stp_debugfs_relay_create_buf_file_callback(const char *filename, + struct dentry *parent, +#ifdef STAPCONF_RELAY_UMODE_T + umode_t mode, +#else + int mode, +#endif + struct rchan_buf *buf, + int *is_global); +static struct dentry * __stp_procfs_relay_create_buf_file_callback(const char *filename, + struct dentry *parent, +#ifdef STAPCONF_RELAY_UMODE_T + umode_t mode, +#else + int mode, +#endif + struct rchan_buf *buf, + int *is_global); + static void _stp_attach(void); static void _stp_detach(void); diff --git a/staprun/ctl.c b/staprun/ctl.c index bc330fd4e..4be68af4e 100644 --- a/staprun/ctl.c +++ b/staprun/ctl.c @@ -16,7 +16,8 @@ int init_ctl_channel(const char *name, int verb) { - char buf[PATH_MAX]; + char buf[PATH_MAX] = ""; // the .ctl file name + char buf2[PATH_MAX] = ""; // other tmp stuff struct statfs st; (void) verb; @@ -48,30 +49,44 @@ int init_ctl_channel(const char *name, int verb) early in staprun), or if errors out for some reason. */ #endif - if (statfs("/sys/kernel/debug", &st) == 0 && (int)st.f_type == (int)DEBUGFS_MAGIC) { + + // See if we have the .ctl file in debugfs + if (sprintf_chk(buf2, "/sys/kernel/debug/systemtap/%s/%s", + name, CTL_CHANNEL_NAME)) + return -1; + if (statfs("/sys/kernel/debug", &st) == 0 && (int)st.f_type == (int)DEBUGFS_MAGIC && + (access (buf2, W_OK)==0)) { /* PR14245: allow subsequent operations, and if necessary, staprun->stapio forks, to reuse an fd for directory lookups (even if some parent directories have perms 0700. */ + strcpy(buf, buf2); // committed + +#ifdef HAVE_OPENAT + if (! sprintf_chk(buf2, "/sys/kernel/debug/systemtap/%s", name)) { + relay_basedir_fd = open (buf2, O_DIRECTORY | O_RDONLY); + } +#endif + } + + // PR26665: try /proc/systemtap/... also + // (STP_TRANSPORT_1 used to use this for other purposes.) + if (sprintf_chk(buf2, "/proc/systemtap/%s/%s", + name, CTL_CHANNEL_NAME)) + return -1; + if (relay_basedir_fd < 0 && (access(buf2, W_OK)==0)) { + strcpy(buf, buf2); // committed + #ifdef HAVE_OPENAT - if (! sprintf_chk(buf, "/sys/kernel/debug/systemtap/%s", name)) { - relay_basedir_fd = open (buf, O_DIRECTORY | O_RDONLY); - /* If this fails, we don't much care; the - negative return value will just keep us - looking up by name again next time. */ - /* NB: we don't plan to close this fd, so that we can pass - it across staprun->stapio fork/execs. */ + if (! sprintf_chk(buf2, "/proc/systemtap/%s", name)) { + relay_basedir_fd = open (buf2, O_DIRECTORY | O_RDONLY); } #endif - if (sprintf_chk(buf, "/sys/kernel/debug/systemtap/%s/%s", - name, CTL_CHANNEL_NAME)) - return -1; - /* - STP_TRANSPORT_VERSION=1 used this: - if (sprintf_chk(buf, "/proc/systemtap/%s/%s", name, CTL_CHANNEL_NAME)) - return -2; - */ } + + /* At this point, we have buf, which is the full path to the .ctl file, + and we may have a relay_basedir_fd, which is useful to pass across + staprun->stapio fork/execs. */ control_channel = open_cloexec(buf, O_RDWR, 0); dbug(2, "Opened %s (%d)\n", buf, control_channel); diff --git a/staprun/relay.c b/staprun/relay.c index 838f3fa1e..2f5f2e06a 100644 --- a/staprun/relay.c +++ b/staprun/relay.c @@ -338,6 +338,13 @@ int init_relayfs(void) dbug(2, "attempting to open %s\n", buf); relay_fd[i] = open_cloexec(buf, O_RDONLY | O_NONBLOCK, 0); } + if (relay_fd[i] < 0) { + if (sprintf_chk(buf, "/proc/systemtap/%s/trace%d", + modname, i)) + return -1; + dbug(2, "attempting to open %s\n", buf); + relay_fd[i] = open_cloexec(buf, O_RDONLY | O_NONBLOCK, 0); + } if (relay_fd[i] >= 0) { avail_cpus[cpui++] = i; } diff --git a/testsuite/buildok/trans-debugfs.stp b/testsuite/buildok/trans-debugfs.stp new file mode 100755 index 000000000..cb1a34351 --- /dev/null +++ b/testsuite/buildok/trans-debugfs.stp @@ -0,0 +1,3 @@ +#! /bin/sh + +stap -p4 -DSTAP_TRANS_DEBUGFS -e 'probe oneshot {log("hi")}' diff --git a/testsuite/buildok/trans-procfs.stp b/testsuite/buildok/trans-procfs.stp new file mode 100755 index 000000000..562bb1678 --- /dev/null +++ b/testsuite/buildok/trans-procfs.stp @@ -0,0 +1,3 @@ +#! /bin/sh + +stap -p4 -DSTAP_TRANS_PROCFS -e 'probe oneshot {log("hi")}' -- 2.43.5