* What's new in version 4.2, PRERELEASE
+- The stapbpf backend now supports procfs probes. The implementation
+ uses FIFO special files in /var/tmp/systemtap-USER/MODNAME instead
+ of the proc filesystem files.
+
- The eBPF backend now uses bpf raw tracepoints for kernel.trace("*")
probes. These have target variable arguments that match the
arguments available for the traditional linux kernel modules
FN(sprintf), \
FN(stapbpf_stat_get), \
FN(gettimeofday_ns), \
- FN(get_target),
+ FN(set_procfs_value), \
+ FN(append_procfs_value), \
+ FN(get_procfs_value),
+
const bpf_func_id BPF_FUNC_map_get_next_key = (bpf_func_id) -1;
const bpf_func_id BPF_FUNC_sprintf = (bpf_func_id) -2;
const bpf_func_id BPF_FUNC_stapbpf_stat_get = (bpf_func_id) -3;
const bpf_func_id BPF_FUNC_gettimeofday_ns = (bpf_func_id) -4;
const bpf_func_id BPF_FUNC_get_target = (bpf_func_id) -5;
-
+const bpf_func_id BPF_FUNC_set_procfs_value = (bpf_func_id) -6;
+const bpf_func_id BPF_FUNC_append_procfs_value = (bpf_func_id) -7;
+const bpf_func_id BPF_FUNC_get_procfs_value = (bpf_func_id) -8;
struct insn
{
#include <sstream>
#include <unistd.h>
#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
extern "C" {
#include <libelf.h>
emit_mov(this_prog.lookup_reg(BPF_REG_4), this_prog.new_imm(0));
this_prog.mk_call(this_ins, BPF_FUNC_map_update_elem, 4);
return;
- }
- }
+ }
+ }
err:
throw SEMANTIC_ERROR (_("unknown lvalue"), e->tok);
}
}
void
-bpf_unparser::visit_symbol (symbol *s)
+bpf_unparser::visit_symbol(symbol *s)
{
vardecl *v = s->referent;
assert (v->arity < 1);
- if (bpf_context_vardecl *c = dynamic_cast<bpf_context_vardecl*>(v))
- {
- result = emit_context_var(c);
- return;
- }
-
auto g = glob.globals.find (v);
if (g != glob.globals.end())
{
u.add_prologue();
dp->body->visit (&u);
+
if (u.in_block())
u.emit_jmp(u.get_ret0_block());
}
}
}
+ if (s.procfs_derived_probes)
+ {
+ sort_for_bpf_probe_arg_vector procfs_v;
+ sort_for_bpf(s, s.procfs_derived_probes, procfs_v);
+
+ for (auto i = procfs_v.begin(); i != procfs_v.end(); ++i)
+ {
+ t = i->first->tok;
+ program p(target_user_bpfinterp);
+ translate_probe(p, glob, i->first);
+ p.generate();
+ output_probe(eo, p, i->second, 0);
+ }
+ }
+
if (s.perf_derived_probes)
{
sort_for_bpf_probe_arg_vector perf_v;
if (e->saved_conversion_error)
session.print_error (* (e->saved_conversion_error));
- else
+ else
session.print_error (SEMANTIC_ERROR(_("unresolved target-symbol expression"), e->tok));
}
procfs.umask(UMASK).write
.ESAMPLE
+Note that there are a few differences when procfs probes are used in the stapbpf runtime.
+.RI FIFO
+special files are used instead of proc filesystem files.
+These files are created in
+/var/tmp/systemtap-USER/MODNAME.
+.RI (USER
+is the name of the user).
+Additionally, users cannot create both read and write probes on the same file.
+
.I PATH
-is the file name (relative to /proc/systemtap/MODNAME) to be created.
+is the file name (relative to /proc/systemtap/MODNAME or /var/tmp/systemtap-USER/MODNAME) to be created.
If no
.I PATH
is specified (as in the last two variants above),
.I PATH
for procfs probes; see the input probe section below.
.PP
-When a user reads /proc/systemtap/MODNAME/PATH, the corresponding
+When a user reads /proc/systemtap/MODNAME/PATH (normal runtime) or /var/tmp/systemtap-USER/MODNAME (stapbpf runtime), the corresponding
procfs
.I read
probe is triggered. The string data to be read should be assigned to
procfs("PATH").read { $value = "100\\n" }
.ESAMPLE
.PP
-When a user writes into /proc/systemtap/MODNAME/PATH, the
+When a user writes into /proc/systemtap/MODNAME/PATH (normal runtime) or /var/tmp/systemtap-USER/MODNAME (stapbpf runtime), the
corresponding procfs
.I write
probe is triggered. The data the user wrote is available in the
man_MANS = stapbpf.8
-stapbpf_SOURCES = stapbpf.cxx bpfinterp.cxx libbpf.c
+stapbpf_SOURCES = stapbpf.cxx bpfinterp.cxx libbpf.c ../util.cxx
stapbpf_CPPFLAGS = $(AM_CPPFLAGS)
stapbpf_CFLAGS = $(AM_CFLAGS)
stapbpf_CXXFLAGS = $(AM_CXXFLAGS)
CONFIG_CLEAN_VPATH_FILES =
am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man8dir)"
PROGRAMS = $(bin_PROGRAMS)
+am__dirstamp = $(am__leading_dot)dirstamp
@HAVE_BPF_DECLS_TRUE@am_stapbpf_OBJECTS = stapbpf-stapbpf.$(OBJEXT) \
@HAVE_BPF_DECLS_TRUE@ stapbpf-bpfinterp.$(OBJEXT) \
-@HAVE_BPF_DECLS_TRUE@ stapbpf-libbpf.$(OBJEXT)
+@HAVE_BPF_DECLS_TRUE@ stapbpf-libbpf.$(OBJEXT) \
+@HAVE_BPF_DECLS_TRUE@ ../stapbpf-util.$(OBJEXT)
stapbpf_OBJECTS = $(am_stapbpf_OBJECTS)
am__DEPENDENCIES_1 =
@HAVE_BPF_DECLS_TRUE@stapbpf_DEPENDENCIES = $(am__DEPENDENCIES_1)
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__maybe_remake_depfiles = depfiles
-am__depfiles_remade = ./$(DEPDIR)/stapbpf-bpfinterp.Po \
- ./$(DEPDIR)/stapbpf-libbpf.Po ./$(DEPDIR)/stapbpf-stapbpf.Po
+am__depfiles_remade = ../$(DEPDIR)/stapbpf-util.Po \
+ ./$(DEPDIR)/stapbpf-bpfinterp.Po ./$(DEPDIR)/stapbpf-libbpf.Po \
+ ./$(DEPDIR)/stapbpf-stapbpf.Po
am__mv = mv -f
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
-DPKGLIBDIR='"$(pkglibexecdir)"' -DLOCALEDIR='"$(localedir)"'
AM_LDFLAGS = @PIELDFLAGS@
@HAVE_BPF_DECLS_TRUE@man_MANS = stapbpf.8
-@HAVE_BPF_DECLS_TRUE@stapbpf_SOURCES = stapbpf.cxx bpfinterp.cxx libbpf.c
+@HAVE_BPF_DECLS_TRUE@stapbpf_SOURCES = stapbpf.cxx bpfinterp.cxx libbpf.c ../util.cxx
@HAVE_BPF_DECLS_TRUE@stapbpf_CPPFLAGS = $(AM_CPPFLAGS) $(am__append_1)
@HAVE_BPF_DECLS_TRUE@stapbpf_CFLAGS = $(AM_CFLAGS)
@HAVE_BPF_DECLS_TRUE@stapbpf_CXXFLAGS = $(AM_CXXFLAGS)
clean-binPROGRAMS:
-test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
+../$(am__dirstamp):
+ @$(MKDIR_P) ..
+ @: > ../$(am__dirstamp)
+../$(DEPDIR)/$(am__dirstamp):
+ @$(MKDIR_P) ../$(DEPDIR)
+ @: > ../$(DEPDIR)/$(am__dirstamp)
+../stapbpf-util.$(OBJEXT): ../$(am__dirstamp) \
+ ../$(DEPDIR)/$(am__dirstamp)
stapbpf$(EXEEXT): $(stapbpf_OBJECTS) $(stapbpf_DEPENDENCIES) $(EXTRA_stapbpf_DEPENDENCIES)
@rm -f stapbpf$(EXEEXT)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
+ -rm -f ../*.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@../$(DEPDIR)/stapbpf-util.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stapbpf-bpfinterp.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stapbpf-libbpf.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stapbpf-stapbpf.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='bpfinterp.cxx' object='stapbpf-bpfinterp.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(stapbpf_CPPFLAGS) $(CPPFLAGS) $(stapbpf_CXXFLAGS) $(CXXFLAGS) -c -o stapbpf-bpfinterp.obj `if test -f 'bpfinterp.cxx'; then $(CYGPATH_W) 'bpfinterp.cxx'; else $(CYGPATH_W) '$(srcdir)/bpfinterp.cxx'; fi`
+
+../stapbpf-util.o: ../util.cxx
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(stapbpf_CPPFLAGS) $(CPPFLAGS) $(stapbpf_CXXFLAGS) $(CXXFLAGS) -MT ../stapbpf-util.o -MD -MP -MF ../$(DEPDIR)/stapbpf-util.Tpo -c -o ../stapbpf-util.o `test -f '../util.cxx' || echo '$(srcdir)/'`../util.cxx
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ../$(DEPDIR)/stapbpf-util.Tpo ../$(DEPDIR)/stapbpf-util.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='../util.cxx' object='../stapbpf-util.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(stapbpf_CPPFLAGS) $(CPPFLAGS) $(stapbpf_CXXFLAGS) $(CXXFLAGS) -c -o ../stapbpf-util.o `test -f '../util.cxx' || echo '$(srcdir)/'`../util.cxx
+
+../stapbpf-util.obj: ../util.cxx
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(stapbpf_CPPFLAGS) $(CPPFLAGS) $(stapbpf_CXXFLAGS) $(CXXFLAGS) -MT ../stapbpf-util.obj -MD -MP -MF ../$(DEPDIR)/stapbpf-util.Tpo -c -o ../stapbpf-util.obj `if test -f '../util.cxx'; then $(CYGPATH_W) '../util.cxx'; else $(CYGPATH_W) '$(srcdir)/../util.cxx'; fi`
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ../$(DEPDIR)/stapbpf-util.Tpo ../$(DEPDIR)/stapbpf-util.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='../util.cxx' object='../stapbpf-util.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(stapbpf_CPPFLAGS) $(CPPFLAGS) $(stapbpf_CXXFLAGS) $(CXXFLAGS) -c -o ../stapbpf-util.obj `if test -f '../util.cxx'; then $(CYGPATH_W) '../util.cxx'; else $(CYGPATH_W) '$(srcdir)/../util.cxx'; fi`
install-man8: $(man_MANS)
@$(NORMAL_INSTALL)
@list1=''; \
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+ -rm -f ../$(DEPDIR)/$(am__dirstamp)
+ -rm -f ../$(am__dirstamp)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
clean-am: clean-binPROGRAMS clean-generic mostlyclean-am
distclean: distclean-am
- -rm -f ./$(DEPDIR)/stapbpf-bpfinterp.Po
+ -rm -f ../$(DEPDIR)/stapbpf-util.Po
+ -rm -f ./$(DEPDIR)/stapbpf-bpfinterp.Po
-rm -f ./$(DEPDIR)/stapbpf-libbpf.Po
-rm -f ./$(DEPDIR)/stapbpf-stapbpf.Po
-rm -f Makefile
installcheck-am:
maintainer-clean: maintainer-clean-am
- -rm -f ./$(DEPDIR)/stapbpf-bpfinterp.Po
+ -rm -f ../$(DEPDIR)/stapbpf-util.Po
+ -rm -f ./$(DEPDIR)/stapbpf-bpfinterp.Po
-rm -f ./$(DEPDIR)/stapbpf-libbpf.Po
-rm -f ./$(DEPDIR)/stapbpf-stapbpf.Po
-rm -f Makefile
#include "bpfinterp.h"
#include "libbpf.h"
#include "../bpf-internal.h"
+#include "../util.h"
#define stapbpf_abort(reason) \
({ fprintf(stderr, _("bpfinterp.cxx:%d: %s\n"), \
return target_pid;
}
+uint64_t
+bpf_set_procfs_value(char* msg, bpf_transport_context* ctx)
+{
+ assert(msg != nullptr);
+
+ ctx->procfs_msg = std::string(msg);
+
+ return 0;
+}
+
+uint64_t
+bpf_append_procfs_value(char* msg, bpf_transport_context* ctx)
+{
+ assert(msg != nullptr);
+
+ ctx->procfs_msg.append(std::string(msg));
+
+ return 0;
+}
+
+uint64_t
+bpf_get_procfs_value(bpf_transport_context* ctx)
+{
+ return (uint64_t) (ctx->procfs_msg.data());
+}
+
enum bpf_perf_event_ret
bpf_handle_transport_msg(void *buf, size_t size,
bpf_transport_context *ctx)
memset(regs, 0x0, sizeof(uint64_t) * MAX_BPF_REG);
const struct bpf_insn *i = insns;
static std::vector<uint64_t *> map_values;
- static std::vector<std::string> strings; // TODO: could clear on exit?
+
+ // Multiple threads accessing strings can cause concurrency issues for
+ // procfs_probes. However, the procfs_lock should prevent this and thus,
+ // clearing it on exit is unecessary for now.
+ static std::vector<std::string> strings;
bpf_map_def *map_attrs = ctx->map_attrs;
std::vector<int> &map_fds = *ctx->map_fds;
case bpf::BPF_FUNC_get_target:
dr = bpf_get_target();
break;
+ case bpf::BPF_FUNC_set_procfs_value:
+ dr = bpf_set_procfs_value(as_str(regs[1]), ctx);
+ break;
+ case bpf::BPF_FUNC_append_procfs_value:
+ dr = bpf_append_procfs_value(as_str(regs[1]), ctx);
+ break;
+ case bpf::BPF_FUNC_get_procfs_value:
+ dr = bpf_get_procfs_value(ctx);
+ break;
default:
stapbpf_abort("unknown helper function");
}
for (uint64_t *ptr : map_values)
free(ptr);
map_values.clear(); // XXX: avoid double free
+
return result;
}
std::vector<std::string> *interned_strings;
std::unordered_map<bpf::globals::agg_idx, bpf::globals::stats_map> *aggregates;
// XXX: Could be refactored into a single global struct bpf_global_context.
+
+ // Data for procfs probes. Multiple threads will be accessing this variable.
+ // However, the procfs_lock should prevent any concurrency issues.
+ std::string procfs_msg;
// Data for an in-progress printf request:
bool in_printf;
process.*
timer.*
perf.*
+procfs.*
.ESAMPLE
In general, probes based on the kprobes, uprobes, tracepoint and perf
#include <string>
#include <thread>
#include <vector>
+#include <mutex>
#include <unistd.h>
#include <limits.h>
#include <inttypes.h>
#include <getopt.h>
#include <sys/fcntl.h>
#include <sys/ioctl.h>
+#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <sys/utsname.h>
#include <sys/resource.h>
#include "bpfinterp.h"
+#include "../util.h"
extern "C" {
#include <linux/bpf.h>
static const char *module_basename;
static const char *script_name; // name of original systemtap script
static const char *module_license;
+
+static const char *user; // username
+static std::string prefix; // used to create procfs-like probe directory
+
static Elf *module_elf;
static uint32_t kernel_version;
+// Locks for accessing procfs-like probe messages
+std::mutex procfs_lock;
+
// Sized by the contents of the "maps" section.
static bpf_map_def *map_attrs;
static std::vector<int> map_fds;
static void unregister_kprobes(const size_t nprobes);
+struct procfsprobe_data
+{
+ std::string path;
+ uint64_t umask;
+ char type; // either 'r' (read) or 'w' (write)
+ uint64_t maxsize_val;
+ Elf_Data* read_prog;
+ std::vector<Elf_Data*> write_prog;
+
+ // ctor for read probes
+ procfsprobe_data(string path, uint64_t umask, char type, uint64_t maxsize_val, Elf_Data* prog)
+ : path(path), umask(umask), type(type), maxsize_val(maxsize_val), read_prog(prog)
+ { assert (type == 'r'); }
+
+ // ctor for write probes
+ procfsprobe_data(string path, uint64_t umask, char type, uint64_t maxsize_val, std::vector<Elf_Data*> prog)
+ : path(path), umask(umask), type(type), maxsize_val(maxsize_val), write_prog(prog)
+ { assert (type == 'w'); }
+};
+
+
struct kprobe_data
{
string args;
{ }
};
+static std::vector<procfsprobe_data> procfsprobes;
static std::vector<kprobe_data> kprobes;
static std::vector<timer_data> timers;
static std::vector<perf_data> perf_probes;
fatal("%s\n", elf_errmsg(-1));
}
-
// XXX: based on get_online_cpus()/read_cpu_range()
// in bcc src/cc/common.cc
//
kprobes.push_back(kprobe_data(type, arg, fd));
}
+static void
+collect_procfsprobe(const char *name, Elf_Data* prog)
+{
+ uint64_t umask;
+ uint64_t maxsize_val;
+ char type;
+ char fifoname[PATH_MAX];
+
+ int res = sscanf(name, "procfsprobe/%lu/%c/%lu/%s", &umask, &type, &maxsize_val, fifoname);
+
+ if (res != 4)
+ fatal("unable to parse name of probe: %s", name);
+
+ std::string path(fifoname);
+
+ if (type == 'r')
+ procfsprobes.push_back(procfsprobe_data(path, umask, type, maxsize_val, prog));
+ else
+ {
+ // Check if a write probe with the same path already exists
+ for (unsigned i = 0; i < procfsprobes.size(); i++)
+ if (procfsprobes[i].path == string(path) && procfsprobes[i].type == 'w')
+ {
+ procfsprobes[i].write_prog.push_back(prog);
+ return;
+ }
+
+ std::vector<Elf_Data*> progs;
+ progs.push_back(prog);
+ procfsprobes.push_back(procfsprobe_data(path, umask, type, maxsize_val, progs));
+ }
+}
+
static void
collect_uprobe(const char *name, unsigned name_idx, unsigned fd_idx)
{
if (ehdr == NULL)
fatal_elf();
+ /* Get username and set directory prefix: */
+ user = getlogin();
+
+ if (!user)
+ fatal("an error occured while retrieving username. %s.\n", strerror(errno));
+
+ // TODO: fix script_name so we can directly use it here
+
+ std::string module_name = std::string(module_basename);
+ module_name = module_name.substr(0, module_name.size() - 3);
+
+ prefix = "/var/tmp/systemtap-" + std::string(user) + "/" + module_name + "/";
+
// Byte order should match the host, since we're loading locally.
{
const char *end_str;
unsigned begin_idx = 0;
unsigned end_idx = 0;
+ std::vector<unsigned> procfsprobes_idx;
+
// First pass to identify special sections, and make sure
// all data is readable.
for (unsigned i = 1; i < shnum; ++i)
begin_idx = i;
else if (strcmp(shname, "stap_end") == 0)
end_idx = i;
+ else if (strncmp(shname, "procfs", strlen("procfs")) == 0) {
+ // procfs probes have a "procfs" prefix in their names, we don't
+ // use normal strcmp as the full shname includes args
+ procfsprobes_idx.push_back(i);
+ }
}
// Two special sections are not optional.
script_name = static_cast<char *>(sh_data[script_name_idx]->d_buf);
else
script_name = "<unknown>";
+
if (version_idx != 0)
{
unsigned long long size = shdrs[version_idx]->sh_size;
prog_fds[i] = prog_load(sh_data[i], sh_name[i]);
}
- // Remember begin and end probes.
+ // Remember begin, end and procfs-like probes.
if (begin_idx)
{
Elf64_Shdr *shdr = shdrs[begin_idx];
prog_end = sh_data[end_idx];
}
+ for (unsigned i = 0; i < procfsprobes_idx.size(); ++i)
+ {
+ unsigned actual_idx = procfsprobes_idx[i];
+
+ Elf64_Shdr *shdr = shdrs[actual_idx];
+ if (shdr->sh_flags & SHF_EXECINSTR)
+ collect_procfsprobe(sh_name[actual_idx], sh_data[actual_idx]);
+ }
+
// Record all kprobes.
if (kprobes_idx != 0)
{
return;
}
+
+static void
+procfs_read_event_loop (procfsprobe_data* data, bpf_transport_context* uctx)
+{
+ std::string path_s = prefix + data->path;
+ const char* path = path_s.c_str();
+
+ Elf_Data* prog = data->read_prog;
+
+ while (true)
+ {
+ int fd = open(path, O_WRONLY);
+
+ if (fd == -1)
+ {
+ if (errno == ENOENT)
+ fatal("an error occured while opening procfs fifo (%s). %s.\n", path, strerror(errno));
+
+ fprintf(stderr, "WARNING: an error occurred while opening procfs fifo (%s). %s.\n",
+ path, strerror(errno));
+ continue;
+ }
+
+ procfs_lock.lock();
+
+ // Run the probe and collect the message.
+ bpf_interpret(prog->d_size / sizeof(bpf_insn), static_cast<bpf_insn *>(prog->d_buf), uctx);
+
+ // Make a copy of the message.
+ std::string msg = uctx->procfs_msg;
+
+ procfs_lock.unlock();
+
+ if (data->maxsize_val && (msg.size() > data->maxsize_val - 1))
+ fprintf(stderr, "WARNING: procfs message size (%ld) exceeds specified maximum size (%ld).\n",
+ msg.size() + 1, data->maxsize_val);
+
+ if (write(fd, msg.c_str(), msg.size() + 1) == -1)
+ {
+ fprintf(stderr, "WARNING: an error occurred while writing to procfs fifo (%s). %s.\n",
+ path, strerror(errno));
+ (void) close(fd);
+ continue;
+ }
+
+ (void) close(fd);
+
+ // We're not sure at this point whether the read end of the pipe has closed. We
+ // perform a small open hack to spin until read end of the pipe has closed.
+
+ do {
+
+ fd = open(path, O_WRONLY | O_NONBLOCK);
+
+ if (fd != -1) close(fd);
+
+ } while (fd != -1);
+ }
+}
+
+
+static void
+procfs_write_event_loop (procfsprobe_data* data, bpf_transport_context* uctx)
+{
+ std::string path_s = prefix + data->path;
+ const char* path = path_s.c_str();
+
+ std::vector<Elf_Data*> prog = data->write_prog;
+
+ while (true)
+ {
+ int fd = open(path, O_RDONLY);
+
+ if (fd == -1)
+ {
+ if (errno == ENOENT)
+ fatal("an error occured while opening procfs fifo (%s). %s.\n", path, strerror(errno));
+
+ fprintf(stderr, "WARNING: an error occurred while opening procfs fifo (%s). %s.\n",
+ path, strerror(errno));
+ continue;
+ }
+
+ std::string msg;
+
+ unsigned read_size = 1024;
+ int bytes_read;
+
+ do {
+
+ char buffer_feed[read_size];
+ bytes_read = read(fd, buffer_feed, read_size);
+
+ if (bytes_read == -1)
+ fprintf(stderr, "WARNING: an error occurred while reading from procfs fifo (%s). %s.\n",
+ path, strerror(errno));
+
+ if (bytes_read > 0)
+ msg.append(std::string(buffer_feed));
+
+ } while (bytes_read > 0);
+
+ (void) close(fd);
+
+ procfs_lock.lock();
+
+ uctx->procfs_msg = msg;
+
+ // Now that we have the message, run the probes serially.
+ for (unsigned i = 0; i < prog.size(); ++i)
+ bpf_interpret(prog[i]->d_size / sizeof(bpf_insn), static_cast<bpf_insn *>(prog[i]->d_buf), uctx);
+
+ procfs_lock.unlock();
+ }
+}
+
+
+static void
+procfs_cleanup()
+{
+ // Delete files and directories created for procfs-like probes.
+ for (size_t k = 0; k < procfsprobes.size(); ++k)
+ {
+ std::string file_s = prefix + procfsprobes[k].path;
+ const char* file = file_s.c_str();
+ if (remove_file_or_dir(file))
+ fprintf(stderr, "WARNING: an error occurred while deleting a file (%s). %s.\n", file, strerror(errno));
+ }
+
+ const char* dir = prefix.c_str();
+ if (procfsprobes.size() > 0 && remove_file_or_dir(dir))
+ fprintf(stderr, "WARNING: an error ocurred while deleting a directory (%s). %s.\n", dir, strerror(errno));
+}
+
+
+static void
+procfs_spawn(bpf_transport_context* uctx)
+{
+ // Enable cleanup routine.
+ if (atexit(procfs_cleanup))
+ fatal("an error occurred while setting up procfs cleaner. %s.\n", strerror(errno));
+
+ // Create directory for procfs-like probes.
+ if (procfsprobes.size() > 0 && create_dir(prefix.c_str()))
+ fatal("an error occurred while making procfs directory. %s.\n", strerror(errno));
+
+ // Create all of the fifos used for procfs-like probes and spawn threads.
+ for (size_t k =0; k < procfsprobes.size(); ++k)
+ {
+ procfsprobe_data* data = &procfsprobes[k];
+
+ std::string path = prefix + data->path;
+
+ uint64_t cmask = umask(data->umask);
+
+ mode_t mode = (data->type == 'r') ? 0444 : 0222;
+
+ if ((mkfifo(path.c_str(), mode) == -1))
+ fatal("an error occured while making procfs fifos. %s.\n", strerror(errno));
+
+ // TODO: Could set the owner/group of the fifo to the effective user.
+
+ umask(cmask);
+
+ if (data->type == 'r')
+ std::thread(procfs_read_event_loop, data, uctx).detach();
+ else
+ std::thread(procfs_write_event_loop, data, uctx).detach();
+ }
+}
+
+
static void
usage(const char *argv0)
{
// PR22330: Listen for perf_events:
std::thread(perf_event_loop, pthread_self()).detach();
+ // Spawn all procfs threads.
+ procfs_spawn(&uctx);
+
// Now that the begin probe has run and the perf_event listener is active, enable the kprobes.
ioctl(group_fd, PERF_EVENT_IOC_ENABLE, 0);
unregister_tracepoints(tracepoint_probes.size());
unregister_raw_tracepoints(raw_tracepoint_probes.size());
+ // Clean procfs-like probe files.
+ procfs_cleanup();
+
// We are now running exit probes, so ^C should exit immediately:
exit_phase = 1;
signal(SIGINT, (sighandler_t)sigint); // restore previously ignored signal
int64_t umask;
string variable_name;
-
procfs_derived_probe (systemtap_session &, probe* p, probe_point* l, string ps, bool w, int64_t m, int64_t umask);
void join_group (systemtap_session& s);
struct procfs_derived_probe_group: public generic_dpg<procfs_derived_probe>
{
+ friend bool sort_for_bpf(systemtap_session& s,
+ procfs_derived_probe_group *pr,
+ sort_for_bpf_probe_arg_vector &v);
+
private:
map<string, procfs_probe_set*> probes_by_path;
typedef map<string, procfs_probe_set*>::iterator p_b_p_iterator;
procfs_derived_probe_group () :
has_read_probes(false), has_write_probes(false) {}
- void enroll (procfs_derived_probe* probe);
+ void enroll (procfs_derived_probe* probe, systemtap_session& s);
void emit_kernel_module_init (systemtap_session& s);
void emit_kernel_module_exit (systemtap_session& s);
void emit_module_decls (systemtap_session& s);
void emit_module_exit (systemtap_session& s);
};
+bool
+sort_for_bpf(systemtap_session& s __attribute__ ((unused)),
+ procfs_derived_probe_group *pr,
+ sort_for_bpf_probe_arg_vector &v)
+{
+ if (!pr)
+ return false;
+
+ for (auto i = pr->probes_by_path.begin(); i != pr->probes_by_path.end(); ++i)
+ {
+ procfs_derived_probe *read_probe = i->second->read_probe;
+
+ if (read_probe)
+ {
+ stringstream s;
+ s << "procfsprobe/" << read_probe->umask << "/r/" << read_probe->maxsize_val << "/" << i->first;
+ v.push_back(std::pair<procfs_derived_probe *, std::string> (read_probe, s.str()));
+ }
+
+ vector<procfs_derived_probe*> write_probes = i->second->write_probes;
+
+ for (auto j = write_probes.begin(); j != write_probes.end(); j++)
+ {
+ stringstream s;
+ s << "procfsprobe/" << (*j)->umask << "/w/" << (*j)->maxsize_val << "/" << i->first;
+ v.push_back(std::pair<procfs_derived_probe *, std::string> (*j, s.str()));
+ }
+ }
+
+ return true;
+}
struct procfs_var_expanding_visitor: public var_expanding_visitor
{
ec->code = string("#include \"procfs-probes.h\"");
s.embeds.push_back(ec);
}
- s.procfs_derived_probes->enroll (this);
+ s.procfs_derived_probes->enroll (this, s);
this->group = s.procfs_derived_probes;
}
void
-procfs_derived_probe_group::enroll (procfs_derived_probe* p)
+procfs_derived_probe_group::enroll (procfs_derived_probe* p, systemtap_session& s)
{
procfs_probe_set *pset;
{
pset = probes_by_path[p->path];
+ // You can't have read and write probes for the same path in the bpf runtime.
+ if (s.runtime_mode == systemtap_session::bpf_runtime &&
+ ((p->write && pset->read_probe) || (! p->write && pset->write_probes.size() > 0)))
+ throw SEMANTIC_ERROR(_("both read and write procfs probes cannot exist for the same procfs path \"")
+ + p->path + "\" in the bpf runtime.");
+
// You can only specify 1 read probe.
if (! p->write && pset->read_probe != NULL)
throw SEMANTIC_ERROR(_("only one read procfs probe can exist for procfs path \"") + p->path + "\"");
bool lvalue = is_active_lvalue(e);
if (write_probe && lvalue)
throw SEMANTIC_ERROR(_("procfs $value variable is read-only in a procfs write probe"), e->tok);
- else if (! write_probe && ! lvalue)
- throw SEMANTIC_ERROR(_("procfs $value variable cannot be read in a procfs read probe"), e->tok);
+ else if (! write_probe && ! lvalue)
+ throw SEMANTIC_ERROR(_("procfs $value variable cannot be read in a procfs read probe"), e->tok);
if (e->addressof)
throw SEMANTIC_ERROR(_("cannot take address of procfs variable"), e->tok);
// Remember that we've seen a target variable.
target_symbol_seen = true;
+
+ // If we're in the bpf runtime, we simply replace the target variable with helper
+ // functions in the tapset library which will act as an interfacing mechanism.
+ if (sess.runtime_mode == systemtap_session::bpf_runtime)
+ {
+ functioncall* n = new functioncall;
+ n->tok = e->tok;
+
+ if (!lvalue)
+ n->function = "_get_procfs_value";
+ else
+ {
+ if (*op == "=")
+ n->function = "_set_procfs_value";
+ else if (*op == ".=")
+ n->function = "_append_procfs_value";
+ else
+ throw SEMANTIC_ERROR (_("Only the following assign operators are"
+ " implemented on procfs read target variables:"
+ " '=', '.='"), e->tok);
+ provide_lvalue_call (n);
+ }
+
+ provide (n);
+ return;
+ }
// Synthesize a function.
functiondecl *fdecl = new functiondecl;
throw SEMANTIC_ERROR (_("procfs path cannot be relative (and contain '.' or '..')"), location->components.front()->tok);
}
+
+
if (!(has_read ^ has_write))
throw SEMANTIC_ERROR (_("need read/write component"), location->components.front()->tok);
--- /dev/null
+// target tapset
+// Copyright (C) 2019 Red Hat Inc.
+//
+// This file is part of systemtap, and is free software. You can
+// redistribute it and/or modify it under the terms of the GNU General
+// Public License (GPL); either version 2, or (at your option) any
+// later version.
+
+// TODO: get 'call' instruction to handle the functions below
+
+/**
+ * function _set_procfs_value - Used to set the message
+ * from a procfs-like probe.
+ *
+ * Description: This function always returns 0.
+ */
+
+function _set_procfs_value:long (msg:string)
+%{/* bpf */
+ 0xbf, 1, $msg, -, -; /* mov r1, $msg */
+ 0x85, 0, 0, 0, -6; /* call BPF_FUNC_SET_PROCFS_VALUE */
+ 0xbf, $$, 0, -, -; /* return r0 */
+%}
+
+/**
+ * function _append_procfs_value - Used to append to
+ * the message for a procfs-like probe.
+ *
+ * Description: This function always returns 0.
+ */
+
+function _append_procfs_value:long (append:string)
+%{/* bpf */
+ 0xbf, 1, $append, -, -; /* mov r1, $msg */
+ 0x85, 0, 0, 0, -7; /* call BPF_FUNC_APPEND_PROCFS_VALUE */
+ 0xbf, $$, 0, -, -; /* return r0 */
+%}
+
+/**
+ * function _get_procfs_value - Used to get the message
+ * for a procfs-like probe.
+ *
+ * Description: This function returns a pointer to the
+ * message.
+ */
+
+function _get_procfs_value:string ()
+%{/* bpf */
+ 0x85, 0, 0, 0, -8; /* call BPF_FUNC_GET_PROCFS_VALUE */
+ 0xbf, $$, 0, -, -; /* return r0 */
+%}
bool sort_for_bpf(systemtap_session& s,
generic_kprobe_derived_probe_group *ge,
sort_for_bpf_probe_arg_vector &v);
+bool sort_for_bpf(systemtap_session& s,
+ procfs_derived_probe_group *pr,
+ sort_for_bpf_probe_arg_vector &v);
bool sort_for_bpf(systemtap_session& s,
hrtimer_derived_probe_group *hr,
timer_derived_probe_group *t,
--- /dev/null
+# Test cases for procfs probes with bpf runtime
+
+set test "PROCFS_BPF"
+
+if {![installtest_p]} { untested $test; return }
+
+proc proc_read_value {test path} {
+ set value "<unknown>"
+ if [catch {open $path RDONLY} channel] {
+ fail "$test $channel"
+ } else {
+ set value [read -nonewline $channel]
+ close $channel
+ pass "$test read $value"
+ }
+ return $value
+}
+
+proc proc_write_value {test path value} {
+ if [catch {open $path WRONLY} channel] {
+ fail "$test $channel"
+ } else {
+ puts $channel $value
+ close $channel
+ pass "$test wrote $value"
+ }
+}
+
+proc proc_read_write {} {
+ global test
+
+ set path_read "/var/tmp/systemtap-$user/$test.bo/command"
+ set path_write "/var/tmp/systemtap-$user/$test.bo/other"
+
+ # read the initial value, which should be '100'
+ set value [proc_read_value $test $path_read]
+ if { $value == "100" } {
+ pass "$test received correct initial value"
+ } else {
+ fail "$test received incorrect initial value: $value"
+ }
+
+ # write a new value of '200'
+ proc_write_value $test $path_write "200"
+
+ # make sure it got set to '200'
+ set value [proc_read_value $test $path_read]
+ if { $value == "200" } {
+ pass "$test received correct value: 200"
+ } else {
+ fail "$test received incorrect value: $value"
+ }
+
+ # read it again to make sure nothing changed
+ set value [proc_read_value "$test again" $path_read]
+ if { $value == "200" } {
+ pass "$test received correct value: 200 again"
+ } else {
+ fail "$test received incorrect value: $value again"
+ }
+
+ # write a new value of 'hello'
+ proc_write_value $test $path_write "hello"
+
+ # make sure it got set to 'hello'
+ set value [proc_read_value $test $path_read]
+ if { $value == "hello" } {
+ pass "$test received correct value: hello"
+ } else {
+ fail "$test received incorrect value: $value"
+ }
+
+ # write a new value of 'goodbye'
+ proc_write_value $test $path_write "goodbye"
+
+ # make sure it got set to 'goodbye'
+ set value [proc_read_value $test $path_read]
+ if { $value == "goodbye" } {
+ pass "$test received correct value: goodbye"
+ } else {
+ fail "$test received incorrect value: $value"
+ }
+
+ return 0;
+}
+
+# The script starts with a value of "100". If the user writes into
+# /proc/systemtap/MODNAME/command, that value is returned by the next
+# read.
+
+set systemtap_script {
+ global saved_value
+
+ probe procfs.read {
+ $value = saved_value
+ }
+
+ probe procfs("other").write {
+ saved_value = $value
+ }
+
+ probe begin {
+ saved_value = "100\n"
+ printf("systemtap starting probe\n")
+ }
+
+ probe end {
+ printf("systemtap ending probe\n")
+ printf("final value = %s", saved_value)
+ }
+}
+
+
+# test procfs probes
+set output_string "\\mfinal value = goodbye\\M\r\n"
+stap_run $test proc_read_write $output_string --bpf -e $systemtap_script -m $test
+
+exec /bin/rm -f ${test}.bo