From 1d0e697d7ceebea0b79cb5e34bc0ce477b65e252 Mon Sep 17 00:00:00 2001 From: David Smith Date: Wed, 31 Oct 2012 10:45:41 -0500 Subject: [PATCH] (PR14571 partial fix) Improve dyninst multithreading. * translate.cxx (c_unparser::emit_common_header): Don't declare contexts, let the runtime handle it. (c_unparser::emit_module_init): Instead of allocating/freeing contexts directly, call the runtime functions _stp_runtime_contexts_alloc()/_stp_runtime_contexts/free(). (c_unparser::emit_module_exit): Instead of waiting on contexts directly, call the runtime function _stp_runtime_context_wait(). * tapsets.cxx (common_probe_entryfn_prologue): Updated with new method of getting context in probe prologues. For dyninst, handle a NULL context. (common_probe_entryfn_epilogue): Call _stp_runtime_entryfn_put_context() in probe epilogue. * runtime/dyninst/runtime_context.h: New file. * runtime/linux/runtime_context.h: Ditto. * runtime/common_probe_context.h: Added dyninst fields. * runtime/dyninst/map_runtime.h: Make map locking optional. * runtime/dyninst/print.c (_stp_print_init): Added multithreading support. * runtime/dyninst/runtime.h: Remove the stapdyn_big_dumb_lock. (stp_dyninst_session_init): Call _stp_runtime_contexts_init() and _stp_print_init(). (stp_dyninst_session_exit): Call _stp_print_cleanup(). * runtime/dyninst/stat_runtime.h: Make stat locking optional. * runtime/stat.h: Ditto. * runtime/linux/map_runtime.h (_stp_map_for_each_cpu): Remove unneeded macro. * runtime/linux/runtime.h: Removed unused macros. * runtime/linux/stat_runtime.h (_stp_stat_initialize_locks): Switched back to standard lkm macros. * runtime/map-stat.c: Ditto. * runtime/map.c: Ditto. * runtime/pmap-gen.c: Ditto. * runtime/stat.c: Ditto. * runtime/map.h: Make locking optional. * runtime/runtime.h: Added forward declarations for runtime_context.h. * runtime/runtime_context.h: Include the correct runtime's runtime_context.h. * runtime/vsprintf.c: Update method of getting context. --- runtime/common_probe_context.h | 9 ++ runtime/dyninst/map_runtime.h | 16 +-- runtime/dyninst/print.c | 108 +++++++++++++--- runtime/dyninst/runtime.h | 40 +++--- runtime/dyninst/runtime_context.h | 201 ++++++++++++++++++++++++++++++ runtime/dyninst/stat_runtime.h | 53 +++----- runtime/linux/map_runtime.h | 1 - runtime/linux/runtime.h | 3 - runtime/linux/runtime_context.h | 123 ++++++++++++++++++ runtime/linux/stat_runtime.h | 3 +- runtime/map-stat.c | 4 +- runtime/map.c | 16 +-- runtime/map.h | 4 +- runtime/pmap-gen.c | 6 +- runtime/runtime.h | 7 ++ runtime/runtime_context.h | 6 + runtime/stat.c | 6 +- runtime/stat.h | 4 +- runtime/vsprintf.c | 12 +- tapsets.cxx | 24 +++- translate.cxx | 123 +++--------------- 21 files changed, 536 insertions(+), 233 deletions(-) create mode 100644 runtime/dyninst/runtime_context.h create mode 100644 runtime/linux/runtime_context.h diff --git a/runtime/common_probe_context.h b/runtime/common_probe_context.h index d9fd03b02..b66faca64 100644 --- a/runtime/common_probe_context.h +++ b/runtime/common_probe_context.h @@ -2,6 +2,15 @@ Defines all common fields and probe flags for struct context. Available to C-based probe handlers as fields of the CONTEXT ptr. */ +#ifdef __DYNINST__ +/* The index of this context structure with the array of allocated + context structures. */ +int data_index; + +/* The lock for this context structure. */ +pthread_mutex_t lock; +#endif + /* Used to indicate whether a probe context is in use. Tested in the code entering the probe setup by common_probe_entry_prologue and cleared by the common_probe_entry_epilogue code. When an early error diff --git a/runtime/dyninst/map_runtime.h b/runtime/dyninst/map_runtime.h index 5488847b1..871716953 100644 --- a/runtime/dyninst/map_runtime.h +++ b/runtime/dyninst/map_runtime.h @@ -11,16 +11,15 @@ #ifndef _STAPDYN_MAP_RUNTIME_H_ #define _STAPDYN_MAP_RUNTIME_H_ -/* For dyninst, NEED_MAP_LOCKS is always on since we don't have real - per-cpu data. */ -#ifndef NEED_MAP_LOCKS -#define NEED_MAP_LOCKS -#endif - #include +#ifdef NEED_MAP_LOCKS #define MAP_LOCK(m) pthread_mutex_lock(&(m)->lock) #define MAP_UNLOCK(m) pthread_mutex_unlock(&(m)->lock) +#else +#define MAP_LOCK(sd) do {} while (0) +#define MAP_UNLOCK(sd) do {} while (0) +#endif /* Note that pthread_mutex_trylock()'s return value is opposite of the * kernel's spin_trylock(), so we invert the return value of @@ -32,21 +31,24 @@ static int _stp_map_initialize_lock(MAP m) { +#ifdef NEED_MAP_LOCKS int rc; if ((rc = pthread_mutex_init(&m->lock, NULL)) != 0) { _stp_error("Couldn't initialize map mutex: %d\n", rc); return rc; } +#endif return 0; } static void _stp_map_destroy_lock(MAP m) { +#ifdef NEED_MAP_LOCKS (void)pthread_mutex_destroy(&m->lock); +#endif } -#define _stp_map_for_each_cpu(cpu) _stp_stat_for_each_cpu(cpu) #define _stp_map_per_cpu_ptr(m, cpu) &((m)[(cpu)]) #endif /* _STAPDYN_MAP_RUNTIME_H_ */ diff --git a/runtime/dyninst/print.c b/runtime/dyninst/print.c index eade758f6..bc71b3fdd 100644 --- a/runtime/dyninst/print.c +++ b/runtime/dyninst/print.c @@ -11,48 +11,114 @@ #ifndef _STAPDYN_PRINT_C_ #define _STAPDYN_PRINT_C_ +#ifdef STP_BULKMODE +#error "Bulk mode output (percpu files) not supported for --runtime=dyninst" +#endif +#ifdef STP_USE_RING_BUFFER +#error "Ring buffer output not supported for --runtime=dyninst" +#endif +#if defined(RELAY_GUEST) || defined(RELAY_HOST) +#error "Relay host/guest output not supported for --runtime=dyninst" +#endif + #include "vsprintf.c" -static size_t _stp_print_buf_alloc = 0; -static size_t _stp_print_buf_used = 0; -static void * _stp_print_buf; +typedef struct { + size_t buf_alloc; + size_t buf_used; + void *buf; +} _stp_pbuf_t; + +static _stp_pbuf_t *_stp_pbuf = NULL; static void _stp_print_kernel_info(char *vstr, int ctx, int num_probes) { // nah... } +static int _stp_print_init(void) +{ + int i; + + /* Allocate an array: _stp_pbuf_t[_stp_runtime_num_contexts] */ + _stp_pbuf = calloc(sizeof(_stp_pbuf_t) * _stp_runtime_num_contexts, 1); + if (_stp_pbuf == NULL) + return -ENOMEM; + + /* Let's go ahead and pre-allocate the buffers. Note they + might grow later. */ + for (i = 0; i < _stp_runtime_num_contexts; i++) { + _stp_pbuf[i].buf_alloc = STP_BUFFER_SIZE; + _stp_pbuf[i].buf = malloc(STP_BUFFER_SIZE); + if (_stp_pbuf[i].buf == NULL) { + _stp_print_cleanup(); + return -ENOMEM; + } + } + return 0; +} + +static void _stp_print_cleanup(void) +{ + int i; + + if (_stp_pbuf == NULL) + return; + + for (i = 0; i < _stp_runtime_num_contexts; i++) { + if (_stp_pbuf[i].buf) + free(_stp_pbuf[i].buf); + } + if (_stp_pbuf) { + free(_stp_pbuf); + _stp_pbuf = NULL; + } +} + static inline void _stp_print_flush(void) { + _stp_pbuf_t *pbuf; + fflush(_stp_err); - if (_stp_print_buf_used) { - fwrite(_stp_print_buf, _stp_print_buf_used, 1, _stp_out); + + pbuf = &_stp_pbuf[_stp_runtime_get_data_index()]; + if (pbuf->buf_used) { + fwrite(pbuf->buf, pbuf->buf_used, 1, _stp_out); fflush(_stp_out); - _stp_print_buf_used = 0; + pbuf->buf_used = 0; } } static void * _stp_reserve_bytes (int numbytes) { - size_t size = _stp_print_buf_used + numbytes; - if (size > _stp_print_buf_alloc) { - void *buf = realloc(_stp_print_buf, size); + _stp_pbuf_t *pbuf; + size_t size; + + pbuf = &_stp_pbuf[_stp_runtime_get_data_index()]; + size = pbuf->buf_used + numbytes; + if (size > pbuf->buf_alloc) { + /* XXX: Should the new size be a multiple of + STP_BUFFER_SIZE? */ + void *buf = realloc(pbuf->buf, size); if (!buf) return NULL; - _stp_print_buf = buf; - _stp_print_buf_alloc = size; + pbuf->buf = buf; + pbuf->buf_alloc = size; } - void *ret = _stp_print_buf + _stp_print_buf_used; - _stp_print_buf_used += numbytes; + void *ret = pbuf->buf + pbuf->buf_used; + pbuf->buf_used += numbytes; return ret; } static void _stp_unreserve_bytes (int numbytes) { - if (unlikely(numbytes <= 0 || numbytes > _stp_print_buf_used)) + _stp_pbuf_t *pbuf; + + pbuf = &_stp_pbuf[_stp_runtime_get_data_index()]; + if (unlikely(numbytes <= 0 || numbytes > pbuf->buf_used)) return; - _stp_print_buf_used -= numbytes; + pbuf->buf_used -= numbytes; } static void _stp_printf (const char *fmt, ...) @@ -65,8 +131,16 @@ static void _stp_printf (const char *fmt, ...) static void _stp_print (const char *str) { - _stp_printf("%s", str); + _stp_printf("%s", str); } -#endif /* _STAPDYN_PRINT_C_ */ +static void _stp_print_char (const char c) +{ + char *p = _stp_reserve_bytes(1);; + + if (p) { + *p = c; + } +} +#endif /* _STAPDYN_PRINT_C_ */ diff --git a/runtime/dyninst/runtime.h b/runtime/dyninst/runtime.h index 5ec5d6072..80e6339d3 100644 --- a/runtime/dyninst/runtime.h +++ b/runtime/dyninst/runtime.h @@ -85,34 +85,17 @@ static inline int pseudo_atomic_cmpxchg(atomic_t *v, int oldval, int newval) return __sync_val_compare_and_swap(&(v->counter), oldval, newval); } - -static pthread_mutex_t stapdyn_big_dumb_lock = PTHREAD_MUTEX_INITIALIZER; - -static inline void _stp_runtime_entryfn_prologue(void) -{ - pthread_mutex_lock(&stapdyn_big_dumb_lock); -} - -static inline void _stp_runtime_entryfn_epilogue(void) -{ - pthread_mutex_unlock(&stapdyn_big_dumb_lock); -} - - #include "linux_defs.h" #define MODULE_DESCRIPTION(str) #define MODULE_LICENSE(str) #define MODULE_INFO(tag,info) -/* XXX for now, act like uniprocessor... */ -#define NR_CPUS 1 -#define num_online_cpus() 1 -#define smp_processor_id() 0 -#define get_cpu() 0 -#define put_cpu() 0 -#define for_each_possible_cpu(cpu) for ((cpu) = 0; (cpu) < NR_CPUS; ++(cpu)) -#define stp_for_each_cpu(cpu) for_each_possible_cpu((cpu)) +/* Semi-forward declaration from runtime_context.h, needed by stat.c. */ +static int _stp_runtime_num_contexts; + +#define for_each_possible_cpu(cpu) for ((cpu) = 0; (cpu) < _stp_runtime_num_contexts; (cpu)++) + #define yield() sched_yield() #define access_ok(type, addr, size) 1 @@ -197,14 +180,24 @@ static void stp_dyninst_ctor(void) _stp_err = _stp_clone_file(stderr); } +static int _stp_runtime_contexts_init(void); + int stp_dyninst_session_init(void) { /* We don't have a chance to indicate errors in the ctor, so do it here. */ if (_stp_mem_fd < 0) { return -errno; } + + int rc = _stp_runtime_contexts_init(); + if (rc != 0) + return rc; + + rc = _stp_print_init(); + if (rc != 0) + return rc; - int rc = systemtap_module_init(); + rc = systemtap_module_init(); if (rc == 0) { stp_dyninst_master = getpid(); } @@ -215,6 +208,7 @@ void stp_dyninst_session_exit(void) { if (stp_dyninst_master == getpid()) { systemtap_module_exit(); + _stp_print_cleanup(); stp_dyninst_master = 0; } } diff --git a/runtime/dyninst/runtime_context.h b/runtime/dyninst/runtime_context.h new file mode 100644 index 000000000..beda374a4 --- /dev/null +++ b/runtime/dyninst/runtime_context.h @@ -0,0 +1,201 @@ +/* -*- linux-c -*- + * Context Runtime Functions + * Copyright (C) 2012 Red Hat Inc. + * + * This file is part of systemtap, and is free software. You can + * redistribute it and/or modify it under the terms of the GNU General + * Public License (GPL); either version 2, or (at your option) any + * later version. + */ + +#ifndef _STAPDYN_RUNTIME_CONTEXT_H_ +#define _STAPDYN_RUNTIME_CONTEXT_H_ + +#include +#include +#include +#include + +static int _stp_runtime_num_contexts; +static struct context *_stp_runtime_contexts = NULL; +static __thread struct context *contexts; + +static int _stp_runtime_contexts_init(void) +{ + _stp_runtime_num_contexts = sysconf(_SC_NPROCESSORS_ONLN); + if (_stp_runtime_num_contexts < 1) + _stp_runtime_num_contexts = 1; + return 0; +} + +static int _stp_runtime_contexts_alloc(void) +{ + size_t size; + int i; + + /* Allocate context data. */ + size = sizeof(struct context) * _stp_runtime_num_contexts; + _stp_runtime_contexts = _stp_kzalloc_gfp(size, STP_ALLOC_SLEEP_FLAGS); + if (_stp_runtime_contexts == NULL) { + _stp_error("context (size %lu) allocation failed", + (unsigned long)size); + return -ENOMEM; + } + + /* Initialize context data. */ + for (i = 0; i < _stp_runtime_num_contexts; i++) { + int rc; + + _stp_runtime_contexts[i].data_index = i; + rc = pthread_mutex_init(&_stp_runtime_contexts[i].lock, NULL); + if (rc != 0) { + _stp_error("pthread mutex initialization failed"); + _stp_kfree(_stp_runtime_contexts); + _stp_runtime_contexts = NULL; + return rc; + } + } + return 0; +} + +static void _stp_runtime_contexts_free(void) +{ + int i; + + if (_stp_runtime_contexts != NULL) { + /* Teardown context locks. */ + for (i = 0; i < _stp_runtime_num_contexts; i++) { + (void)pthread_mutex_destroy(&_stp_runtime_contexts[i].lock); + } + + /* Free context data. */ + _stp_kfree(_stp_runtime_contexts); + _stp_runtime_contexts = NULL; + } +} + +static int _stp_runtime_get_data_index(void) +{ + int data_index; + + /* If this thread has already gotten a context structure, + * return the data index from it. */ + if (contexts != NULL) + return contexts->data_index; + + /* This shouldn't happen. */ + /* FIXME: assert? */ + return 0; +} + +static struct context * _stp_runtime_entryfn_get_context(void) +{ + int i, index, rc, data_index; + + /* If 'contexts' (which is thread-local storage) is already set + * for this thread, we are re-entrant, so just quit. */ + if (contexts != NULL) + return NULL; + + /* Figure out with cpu we're on, which is our default + * data_index. Make sure the returned data index number is within + * the range of [0.._stp_runtime_num_contexts]. Be sure to handle + * a sched_getcpu() failure (it will return -1). */ + data_index = sched_getcpu() % _stp_runtime_num_contexts; + if (unlikely(data_index < 0)) + data_index = 0; + + /* Try to find a free context structure. */ + index = data_index; + for (i = 0; i < _stp_runtime_num_contexts; i++, index++) { + if (index >= _stp_runtime_num_contexts) + index = 0; + if (pthread_mutex_trylock(&_stp_runtime_contexts[index].lock) == 0) { + /* We found a free context structure. Now that it is + * locked, set the TLS pointer and return the context. */ + contexts = &_stp_runtime_contexts[index]; + return contexts; + } + } + + /* If we're here, we couldn't find a free context structure. Wait + * on one. */ + rc = pthread_mutex_lock(&_stp_runtime_contexts[data_index].lock); + if (rc == 0) { + contexts = &_stp_runtime_contexts[data_index]; + return contexts; + } + return NULL; +} + +static void _stp_runtime_entryfn_put_context(void) +{ + if (contexts) { + struct context *c = contexts; + contexts = NULL; + pthread_mutex_unlock(&c->lock); + } + return; +} + +static struct context *_stp_runtime_get_context(void) +{ + /* Note we don't call _stp_runtime_entryfn_get_context() + * here. This function is called after + * _stp_runtime_entryfn_get_context() and has no corresponding + * "put" function. */ + return contexts; +} + +static void _stp_runtime_context_wait(void) +{ + struct timespec hold_start; + int hold_index; + int holdon; + + (void)clock_gettime(CLOCK_MONOTONIC_RAW, &hold_start); + hold_index = -1; + do { + int i; + holdon = 0; + struct timespec now, elapsed; + + for (i = 0; i < _stp_runtime_num_contexts; i++) { + if (atomic_read (&_stp_runtime_contexts[i].busy)) { + holdon = 1; + + /* Just In case things are really stuck, let's print + * some diagnostics. */ + (void)clock_gettime(CLOCK_MONOTONIC_RAW, &now); + _stp_timespec_sub(&now, &hold_start, &elapsed); + + /* If its been > 1 second since we started and we + * haven't already printed a message for this stuck + * context, print one. */ + if (elapsed.tv_sec > 0 && (i > hold_index)) { + hold_index = i; + _stp_error("context[%d] stuck: %s", i, + &_stp_runtime_contexts[i].probe_point); + } + } + } + +#ifdef STAP_OVERRIDE_STUCK_CONTEXT + /* In case things are really really stuck, we are going to + * pretend/assume/hope everything is OK, and let the cleanup + * finish. */ + (void)clock_gettime(CLOCK_MONOTONIC_RAW, &now); + _stp_timespec_sub(&now, &hold_start, &elapsed); + if (elapsed.tv_sec > 10) { + _stp_warn("overriding stuck context to allow shutdown."); + holdon = 0; /* allow loop to exit */ + } +#endif + + if (holdon) { + sched_yield(); + } + } while (holdon); +} + +#endif /* _STAPDYN_RUNTIME_CONTEXT_H_ */ diff --git a/runtime/dyninst/stat_runtime.h b/runtime/dyninst/stat_runtime.h index 409a495eb..6bf9657d0 100644 --- a/runtime/dyninst/stat_runtime.h +++ b/runtime/dyninst/stat_runtime.h @@ -15,57 +15,31 @@ #include #include -/* For dyninst, NEED_STAT_LOCKS is always on since we don't have real - per-cpu data. */ -#ifndef NEED_STAT_LOCKS -#define NEED_STAT_LOCKS +#ifdef NEED_STAT_LOCKS +#define STAT_LOCK(sd) pthread_mutex_lock(&(sd)->lock) +#define STAT_UNLOCK(sd) pthread_mutex_unlock(&(sd)->lock) +#else +#define STAT_LOCK(sd) do {} while (0) +#define STAT_UNLOCK(sd) do {} while (0) #endif -#define STAT_LOCK(sd) pthread_mutex_lock(&(sd)->lock) -#define STAT_UNLOCK(sd) pthread_mutex_unlock(&(sd)->lock) - - -/* Number of items allocated for a map or stat. Gets initialized to - the number of online cpus. */ -static inline int _stp_stat_get_cpus(void) -{ - static int online_cpus = 0; - if (unlikely(online_cpus == 0)) { - online_cpus = sysconf(_SC_NPROCESSORS_ONLN); - } - return online_cpus; -} - - -static inline int STAT_GET_CPU(void) +static int STAT_GET_CPU(void) { - /* - * Make sure the cpu number is within the range of - * [0.._stp_stat_get_cpus()]. If sched_getcpu() fails, - * it returns -1. - */ - int cpu = sched_getcpu() % _stp_stat_get_cpus(); - if (unlikely(cpu < 0)) - cpu = 0; - return cpu; + return _stp_runtime_get_data_index(); } - #define STAT_PUT_CPU() do {} while (0) -#define _stp_stat_for_each_cpu(cpu) \ - for ((cpu) = 0; (cpu) < _stp_stat_get_cpus(); (cpu)++) - - #define _stp_stat_per_cpu_ptr(stat, cpu) \ ((stat_data *)((void *)((stat)->sd) + ((stat)->size * (cpu)))) static int _stp_stat_initialize_locks(Stat st) { +#ifdef NEED_STAT_LOCKS int i, rc; - _stp_stat_for_each_cpu(i) { + for_each_possible_cpu(i) { stat_data *sdp = _stp_stat_per_cpu_ptr (st, i); if ((rc = pthread_mutex_init(&sdp->lock, NULL)) != 0) { @@ -79,17 +53,22 @@ static int _stp_stat_initialize_locks(Stat st) _stp_error("Couldn't initialize stat mutex: %d\n", rc); } return rc; +#else + return 0; +#endif } static void _stp_stat_destroy_locks(Stat st) { +#ifdef NEED_STAT_LOCKS int i; - _stp_stat_for_each_cpu(i) { + for_each_possible_cpu(i) { stat_data *sdp = _stp_stat_per_cpu_ptr(st, i); (void)pthread_mutex_destroy(&sdp->lock); } (void)pthread_mutex_destroy(&st->agg->lock); +#endif } #endif /* _STAPDYN_STAT_RUNTIME_H_ */ diff --git a/runtime/linux/map_runtime.h b/runtime/linux/map_runtime.h index 6b514b974..bb6995c63 100644 --- a/runtime/linux/map_runtime.h +++ b/runtime/linux/map_runtime.h @@ -44,7 +44,6 @@ static int _stp_map_initialize_lock(MAP m) #define _stp_map_destroy_lock(m) do {} while (0) -#define _stp_map_for_each_cpu(cpu) for_each_possible_cpu((cpu)) #define _stp_map_per_cpu_ptr(m, cpu) per_cpu_ptr((m), (cpu)) #endif /* _LINUX_MAP_RUNTIME_H_ */ diff --git a/runtime/linux/runtime.h b/runtime/linux/runtime.h index e167be6e4..317bb58b5 100644 --- a/runtime/linux/runtime.h +++ b/runtime/linux/runtime.h @@ -187,9 +187,6 @@ void *kallsyms_task_work_cancel; struct unwind_context { }; #endif -static inline void _stp_runtime_entryfn_prologue(void) { } -static inline void _stp_runtime_entryfn_epilogue(void) { } - #ifdef module_param_cb /* kernels >= 2.6.36 */ #define _STP_KERNEL_PARAM_ARG const struct kernel_param #else diff --git a/runtime/linux/runtime_context.h b/runtime/linux/runtime_context.h new file mode 100644 index 000000000..c7c552c9c --- /dev/null +++ b/runtime/linux/runtime_context.h @@ -0,0 +1,123 @@ +/* -*- linux-c -*- + * Context Runtime Functions + * Copyright (C) 2012 Red Hat Inc. + * + * This file is part of systemtap, and is free software. You can + * redistribute it and/or modify it under the terms of the GNU General + * Public License (GPL); either version 2, or (at your option) any + * later version. + */ + +#ifndef _LINUX_RUNTIME_CONTEXT_H_ +#define _LINUX_RUNTIME_CONTEXT_H_ + +static struct context *contexts[NR_CPUS] = { NULL }; + +static int _stp_runtime_contexts_alloc(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + /* Module init, so in user context, safe to use + * "sleeping" allocation. */ + contexts[cpu] = _stp_kzalloc_gfp(sizeof(struct context), + STP_ALLOC_SLEEP_FLAGS); + if (contexts[cpu] == NULL) { + _stp_error ("context (size %lu) allocation failed", + (unsigned long) sizeof (struct context)); + return -ENOMEM; + } + } + return 0; +} + +static void _stp_runtime_contexts_free(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (contexts[cpu] != NULL) { + _stp_kfree(contexts[cpu]); + contexts[cpu] = NULL; + } + } +} + +static struct context * _stp_runtime_entryfn_get_context(void) +{ + return contexts[smp_processor_id()]; +} + +static inline void _stp_runtime_entryfn_put_context(void) +{ + /* Do nothing. */ + return; +} + +static inline struct context * _stp_runtime_get_context(void) +{ + return contexts[smp_processor_id()]; +} + +static void _stp_runtime_context_wait(void) +{ + int holdon; + unsigned long hold_start; + int hold_index; + + hold_start = jiffies; + hold_index = -1; + do { + int i; + + holdon = 0; + for_each_possible_cpu(i) { + if (contexts[i] != NULL + && atomic_read (& contexts[i]->busy)) { + holdon = 1; + + /* Just in case things are really + * stuck, let's print some diagnostics. */ + if (time_after(jiffies, hold_start + HZ) // > 1 second + && (i > hold_index)) { // not already printed + hold_index = i; + printk(KERN_ERR "%s context[%d] stuck: %s\n", THIS_MODULE->name, i, contexts[i]->probe_point); + } + } + } + + /* + * Just in case things are really really stuck, a + * handler probably suffered a fault, and the kernel + * probably killed a task/thread already. We can't be + * quite sure in what state everything is in, however + * auxiliary stuff like kprobes / uprobes / locks have + * already been unregistered. So it's *probably* safe + * to pretend/assume/hope everything is OK, and let + * the cleanup finish. + * + * In the worst case, there may occur a fault, as a + * genuinely running probe handler tries to access + * script globals (about to be freed), or something + * accesses module memory (about to be unloaded). + * This is sometimes stinky, so the alternative + * (default) is to change from a livelock to a + * livelock that sleeps awhile. + */ +#ifdef STAP_OVERRIDE_STUCK_CONTEXT + if (time_after(jiffies, hold_start + HZ*10)) { // > 10 seconds + printk(KERN_ERR "%s overriding stuck context to allow module shutdown.", THIS_MODULE->name); + holdon = 0; // allow loop to exit + } +#else + /* at least stop sucking down the staprun cpu */ + msleep(250); +#endif + + /* NB: we run at least one of these during the + * shutdown sequence: */ + yield(); /* aka schedule() and then some */ + } while (holdon); +} + +#endif /* _LINUX_RUNTIME_CONTEXT_H_ */ diff --git a/runtime/linux/stat_runtime.h b/runtime/linux/stat_runtime.h index d8d58a73d..18e7e41f9 100644 --- a/runtime/linux/stat_runtime.h +++ b/runtime/linux/stat_runtime.h @@ -25,14 +25,13 @@ #define STAT_PUT_CPU() do {} while (0) #endif -#define _stp_stat_for_each_cpu(cpu) for_each_possible_cpu((cpu)) #define _stp_stat_per_cpu_ptr(stat, cpu) per_cpu_ptr((stat)->sd, (cpu)) static int _stp_stat_initialize_locks(Stat st) { #ifdef NEED_STAT_LOCKS int i; - _stp_stat_for_each_cpu(i) { + for_each_possible_cpu(i) { stat_data *sdp = _stp_stat_per_cpu_ptr(st, i); spin_lock_init(&sdp->lock); } diff --git a/runtime/map-stat.c b/runtime/map-stat.c index d779664ab..c79cba10a 100644 --- a/runtime/map-stat.c +++ b/runtime/map-stat.c @@ -74,7 +74,7 @@ _stp_pmap_new_hstat_linear (unsigned max_entries, int wrap, int ksize, int i; MAP m; - _stp_map_for_each_cpu(i) { + for_each_possible_cpu(i) { m = (MAP)_stp_map_per_cpu_ptr (pmap->map, i); MAP_LOCK(m); m->hist.type = HIST_LINEAR; @@ -106,7 +106,7 @@ _stp_pmap_new_hstat_log (unsigned max_entries, int wrap, int key_size) if (pmap) { int i; MAP m; - _stp_map_for_each_cpu(i) { + for_each_possible_cpu(i) { m = (MAP)_stp_map_per_cpu_ptr (pmap->map, i); MAP_LOCK(m); m->hist.type = HIST_LOG; diff --git a/runtime/map.c b/runtime/map.c index 9067bad44..d08548e1a 100644 --- a/runtime/map.c +++ b/runtime/map.c @@ -268,7 +268,7 @@ _stp_pmap_new(unsigned max_entries, int wrap, int type, int key_size, pmap->map = (MAP) _stp_alloc_percpu (sizeof(struct map_root)); #else /* Allocate an array of map_root structures. */ - pmap->map = (struct map_root *) _stp_kmalloc_gfp(sizeof(struct map_root) * _stp_stat_get_cpus(), + pmap->map = (struct map_root *) _stp_kmalloc_gfp(sizeof(struct map_root) * _stp_runtime_num_contexts, STP_ALLOC_SLEEP_FLAGS); #endif if (pmap->map == NULL) @@ -276,7 +276,7 @@ _stp_pmap_new(unsigned max_entries, int wrap, int type, int key_size, /* Initialize the memory lists first so if allocations fail * at some point, it is easy to clean up. */ - _stp_map_for_each_cpu(i) { + for_each_possible_cpu(i) { m = _stp_map_per_cpu_ptr(pmap->map, i); INIT_LIST_HEAD(&m->pool); INIT_LIST_HEAD(&m->head); @@ -285,7 +285,7 @@ _stp_pmap_new(unsigned max_entries, int wrap, int type, int key_size, INIT_LIST_HEAD(&pmap->agg.pool); INIT_LIST_HEAD(&pmap->agg.head); - _stp_map_for_each_cpu(i) { + for_each_possible_cpu(i) { m = _stp_map_per_cpu_ptr(pmap->map, i); if (_stp_map_init(m, max_entries, wrap, type, key_size, data_size, i)) { @@ -300,7 +300,7 @@ _stp_pmap_new(unsigned max_entries, int wrap, int type, int key_size, return pmap; err1: - _stp_map_for_each_cpu(i) { + for_each_possible_cpu(i) { m = _stp_map_per_cpu_ptr (pmap->map, i); __stp_map_del(m); } @@ -391,7 +391,7 @@ static void _stp_pmap_clear(PMAP pmap) if (pmap == NULL) return; - _stp_map_for_each_cpu(i) { + for_each_possible_cpu(i) { MAP m = _stp_map_per_cpu_ptr (pmap->map, i); MAP_LOCK(m); @@ -444,7 +444,7 @@ static void _stp_pmap_del(PMAP pmap) if (pmap == NULL) return; - _stp_map_for_each_cpu(i) { + for_each_possible_cpu(i) { MAP m = _stp_map_per_cpu_ptr (pmap->map, i); __stp_map_del(m); } @@ -779,7 +779,7 @@ static MAP _stp_pmap_agg (PMAP pmap) /* every time we aggregate. which would be best? */ _stp_map_clear (agg); - _stp_map_for_each_cpu(i) { + for_each_possible_cpu(i) { m = _stp_map_per_cpu_ptr (pmap->map, i); MAP_LOCK(m); /* walk the hash chains. */ @@ -954,7 +954,7 @@ static int _stp_pmap_size (PMAP pmap) { int i, num = 0; - _stp_map_for_each_cpu(i) { + for_each_possible_cpu(i) { MAP m = _stp_map_per_cpu_ptr (pmap->map, i); MAP_LOCK(m); num += m->num; diff --git a/runtime/map.h b/runtime/map.h index 20a48db49..2e06f4342 100644 --- a/runtime/map.h +++ b/runtime/map.h @@ -111,12 +111,12 @@ struct map_root { int data_offset; -#ifdef __KERNEL__ #ifdef NEED_MAP_LOCKS +#ifdef __KERNEL__ spinlock_t lock; -#endif #else /* !__KERNEL__ */ pthread_mutex_t lock; +#endif #endif /* the hash table for this array, allocated in _stp_map_init() */ diff --git a/runtime/pmap-gen.c b/runtime/pmap-gen.c index be416ea50..4ec840bf3 100644 --- a/runtime/pmap-gen.c +++ b/runtime/pmap-gen.c @@ -661,7 +661,7 @@ static PMAP KEYSYM(_stp_pmap_new) (unsigned max_entries, int wrap) if (pmap) { int i; MAP m; - _stp_map_for_each_cpu(i) { + for_each_possible_cpu(i) { m = (MAP)_stp_map_per_cpu_ptr (pmap->map, i); MAP_LOCK(m); m->get_key = KEYSYM(pmap_get_key); @@ -720,7 +720,7 @@ KEYSYM(_stp_pmap_new) (unsigned max_entries, int wrap, int htype, ...) if (pmap) { int i; MAP m; - _stp_map_for_each_cpu(i) { + for_each_possible_cpu(i) { m = _stp_map_per_cpu_ptr (pmap->map, i); MAP_LOCK(m); m->get_key = KEYSYM(pmap_get_key); @@ -944,7 +944,7 @@ static VALTYPE KEYSYM(_stp_pmap_get) (PMAP pmap, ALLKEYSD(key)) } /* now total each cpu */ - _stp_map_for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { map = _stp_map_per_cpu_ptr (pmap->map, cpu); #ifdef NEED_MAP_LOCKS if (!MAP_TRYLOCK(map)) diff --git a/runtime/runtime.h b/runtime/runtime.h index 3ad2e2cbc..6dd96d3cb 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -11,6 +11,13 @@ #ifndef _RUNTIME_H_ #define _RUNTIME_H_ +/* Forward Declarations for routines in runtime_context.h. */ +static int _stp_runtime_contexts_alloc(void); +static void _stp_runtime_contexts_free(void); +static int _stp_runtime_get_data_index(void); +static struct context *_stp_runtime_entryfn_get_context(void); +static void _stp_runtime_entryfn_put_context(void); +static struct context *_stp_runtime_get_context(void); #if defined(__KERNEL__) diff --git a/runtime/runtime_context.h b/runtime/runtime_context.h index af64a9bd0..642376f2a 100644 --- a/runtime/runtime_context.h +++ b/runtime/runtime_context.h @@ -13,6 +13,12 @@ #ifndef _RUNTIME_CONTEXT_H_ #define _RUNTIME_CONTEXT_H_ +#if defined(__KERNEL__) +#include "linux/runtime_context.h" +#elif defined(__DYNINST__) +#include "dyninst/runtime_context.h" +#endif + #include "print.c" #include "io.c" // needs to be included after print.c diff --git a/runtime/stat.c b/runtime/stat.c index 8aea81af3..830961a43 100644 --- a/runtime/stat.c +++ b/runtime/stat.c @@ -94,7 +94,7 @@ static Stat _stp_stat_init (int type, ...) /* Allocate an array of stat_data structures. Note that the * memory must be initialized to zero. */ st->size = size; - st->sd = _stp_kzalloc_gfp(size * _stp_stat_get_cpus(), + st->sd = _stp_kzalloc_gfp(size * _stp_runtime_num_contexts, STP_ALLOC_SLEEP_FLAGS); #endif /* !__KERNEL__ */ if (st->sd == NULL) @@ -188,7 +188,7 @@ static stat_data *_stp_stat_get (Stat st, int clear) STAT_LOCK(agg); _stp_stat_clear_data (st, agg); - _stp_stat_for_each_cpu(i) { + for_each_possible_cpu(i) { stat_data *sd = _stp_stat_per_cpu_ptr (st, i); STAT_LOCK(sd); if (sd->count) { @@ -240,7 +240,7 @@ static void _stp_stat_clear (Stat st) { int i; - _stp_stat_for_each_cpu(i) { + for_each_possible_cpu(i) { stat_data *sd = _stp_stat_per_cpu_ptr (st, i); STAT_LOCK(sd); _stp_stat_clear_data (st, sd); diff --git a/runtime/stat.h b/runtime/stat.h index fdd5fdb63..4531bed18 100644 --- a/runtime/stat.h +++ b/runtime/stat.h @@ -29,13 +29,13 @@ struct stat_data { int64_t count; int64_t sum; int64_t min, max; -#ifdef __KERNEL__ #ifdef NEED_STAT_LOCKS +#ifdef __KERNEL__ spinlock_t lock; -#endif #else /* !__KERNEL__ */ pthread_mutex_t lock; #endif /* !__KERNEL__ */ +#endif int64_t histogram[]; }; typedef struct stat_data stat_data; diff --git a/runtime/vsprintf.c b/runtime/vsprintf.c index 18afb041f..fed418e99 100644 --- a/runtime/vsprintf.c +++ b/runtime/vsprintf.c @@ -356,12 +356,8 @@ _stp_vsprint_memory(char * str, char * end, const char * ptr, if (format == 'M') { /* stolen from kernel: trace_seq_putmem_hex() */ static const char _stp_hex_asc[] = "0123456789abcdef"; -#ifdef __KERNEL__ - c = contexts[smp_processor_id()]; -#else - c = &contexts; -#endif /* PR13386: Skip if called with null context */ + c = _stp_runtime_get_context(); if (c) for (i = 0; i < len && str < end; i++) { unsigned char c_tmp = kread((unsigned char *)(ptr)); ptr++; @@ -371,12 +367,8 @@ _stp_vsprint_memory(char * str, char * end, const char * ptr, len = len * 2; /* the actual length */ } else if (format == 'm') { -#ifdef __KERNEL__ - c = contexts[smp_processor_id()]; -#else - c = &contexts; -#endif /* PR13386: Skip if called with null context */ + c = _stp_runtime_get_context(); if (c) for (i = 0; i < len && str <= end; ++i) { *str++ = kread((unsigned char *)(ptr)); ptr++; diff --git a/tapsets.cxx b/tapsets.cxx index bb4aaf02a..e6cb73ef1 100644 --- a/tapsets.cxx +++ b/tapsets.cxx @@ -135,10 +135,23 @@ common_probe_entryfn_prologue (systemtap_session& s, s.op->newline(1) << "goto probe_epilogue;"; s.op->indent(-1); - if (! s.runtime_usermode_p()) - s.op->newline() << "c = contexts[smp_processor_id()];"; - else - s.op->newline() << "c = &contexts;"; + s.op->newline() << "c = _stp_runtime_entryfn_get_context();"; + if (s.runtime_usermode_p()) + { + s.op->newline() << "if (!c) {"; + s.op->newline(1) << "#if !INTERRUPTIBLE"; + s.op->newline() << "atomic_inc (& skipped_count);"; + s.op->newline() << "#endif"; + s.op->newline() << "#ifdef STP_TIMING"; + s.op->newline() << "atomic_inc (& skipped_count_reentrant);"; + s.op->newline() << "#ifdef DEBUG_REENTRANCY"; + s.op->newline() << "_stp_warn (\"Skipped %s\\n\", " << probe << "->pp);"; + s.op->newline() << "#endif"; + s.op->newline() << "#endif"; + s.op->newline() << "goto probe_epilogue;"; + s.op->newline(-1) << "}"; + } + s.op->newline() << "if (atomic_inc_return (& c->busy) != 1) {"; s.op->newline(1) << "#if !INTERRUPTIBLE"; s.op->newline() << "atomic_inc (& skipped_count);"; @@ -158,7 +171,6 @@ common_probe_entryfn_prologue (systemtap_session& s, s.op->newline() << "atomic_dec (& c->busy);"; s.op->newline() << "goto probe_epilogue;"; s.op->newline(-1) << "}"; - s.op->newline() << "_stp_runtime_entryfn_prologue();"; s.op->newline(); s.op->newline() << "c->last_stmt = 0;"; s.op->newline() << "c->last_error = 0;"; @@ -303,12 +315,12 @@ common_probe_entryfn_epilogue (systemtap_session& s, s.op->newline(-1) << "}"; - s.op->newline() << "_stp_runtime_entryfn_epilogue();"; s.op->newline() << "atomic_dec (&c->busy);"; s.op->newline(-1) << "probe_epilogue:"; // context is free s.op->indent(1); + s.op->newline() << "_stp_runtime_entryfn_put_context();"; if (! s.suppress_handler_errors) // PR 13306 { // Check for excessive skip counts. diff --git a/translate.cxx b/translate.cxx index 0bf50ede3..3f4628638 100644 --- a/translate.cxx +++ b/translate.cxx @@ -1107,10 +1107,6 @@ c_unparser::emit_common_header () emit_compiled_printf_locals (); o->newline(-1) << "};\n"; - if (!session->runtime_usermode_p()) - o->newline() << "static struct context *contexts[NR_CPUS] = { NULL };\n"; - else - o->newline() << "static __thread struct context contexts;\n"; emit_map_type_instantiations (); @@ -1660,20 +1656,11 @@ c_unparser::emit_module_init () // while to abort right away. Currently running probes are allowed to // terminate. These may set STAP_SESSION_ERROR! - if (!session->runtime_usermode_p()) { - // per-cpu context - o->newline() << "for_each_possible_cpu(cpu) {"; - o->indent(1); - // Module init, so in user context, safe to use "sleeping" allocation. - o->newline() << "contexts[cpu] = _stp_kzalloc_gfp(sizeof(struct context), STP_ALLOC_SLEEP_FLAGS);"; - o->newline() << "if (contexts[cpu] == NULL) {"; - o->indent(1); - o->newline() << "_stp_error (\"context (size %lu) allocation failed\", (unsigned long) sizeof (struct context));"; - o->newline() << "rc = -ENOMEM;"; - o->newline() << "goto out;"; - o->newline(-1) << "}"; - o->newline(-1) << "}"; - } + // Allocate context structures. + o->newline() << "rc = _stp_runtime_contexts_alloc();"; + o->newline() << "if (rc != 0)"; + o->newline(1) << "goto out;"; + o->indent(-1); for (unsigned i=0; iglobals.size(); i++) { @@ -1703,13 +1690,13 @@ c_unparser::emit_module_init () // Print a message to the kernel log about this module. This is // intended to help debug problems with systemtap modules. - - o->newline() << "_stp_print_kernel_info(" - << "\"" << VERSION - << "/" << dwfl_version (NULL) << "\"" - << ", (num_online_cpus() * sizeof(struct context))" - << ", " << session->probes.size() - << ");"; + if (! session->runtime_usermode_p()) + o->newline() << "_stp_print_kernel_info(" + << "\"" << VERSION + << "/" << dwfl_version (NULL) << "\"" + << ", (num_online_cpus() * sizeof(struct context))" + << ", " << session->probes.size() + << ");"; // Run all probe registrations. This actually runs begin probes. @@ -1768,16 +1755,7 @@ c_unparser::emit_module_init () o->newline() << "#endif"; // Free up the context memory after an error too - if (!session->runtime_usermode_p()) { - o->newline() << "for_each_possible_cpu(cpu) {"; - o->indent(1); - o->newline() << "if (contexts[cpu] != NULL) {"; - o->indent(1); - o->newline() << "_stp_kfree(contexts[cpu]);"; - o->newline() << "contexts[cpu] = NULL;"; - o->newline(-1) << "}"; - o->newline(-1) << "}"; - } + o->newline() << "_stp_runtime_contexts_free();"; o->newline() << "return rc;"; o->newline(-1) << "}\n"; @@ -1806,13 +1784,6 @@ c_unparser::emit_module_exit () o->newline() << "static void systemtap_module_exit (void) {"; // rc? o->newline(1) << "int i=0, j=0;"; // for derived_probe_group use - if (! session->runtime_usermode_p()) - { - o->newline() << "int holdon;"; - o->newline() << "int cpu;"; - o->newline() << "unsigned long hold_start;"; - o->newline() << "int hold_index;"; - } o->newline() << "(void) i;"; o->newline() << "(void) j;"; // If we aborted startup, then everything has been cleaned up already, and @@ -1849,60 +1820,7 @@ c_unparser::emit_module_exit () // NB: systemtap_module_exit is assumed to be called from ordinary // user context, say during module unload. Among other things, this // means we can sleep a while. - // - // XXX for now, only limit kernel holds, not dyninst. Note that with - // TLS (which the dyninst runtime code uses), there isn't an - // easy/easily-found way to loop over all the current threads to get - // each thread's data value. - if (! session->runtime_usermode_p()) - { - o->newline() << "hold_start = jiffies;"; - o->newline() << "hold_index = -1;"; - o->newline() << "do {"; - o->newline(1) << "int i;"; - o->newline() << "holdon = 0;"; - - o->newline() << "for_each_possible_cpu(i)"; - o->newline(1) << "if (contexts[i] != NULL && " - << "atomic_read (& contexts[i]->busy)) {"; - o->newline(1) << "holdon = 1;"; - - // just in case things are really stuck, let's print some diagnostics - o->newline() << "if (time_after(jiffies, hold_start + HZ) "; // > 1 second - o->line() << "&& (i > hold_index)) {"; // not already printed - o->newline(1) << "hold_index = i;"; - o->newline() << "printk(KERN_ERR \"%s context[%d] stuck: %s\\n\", THIS_MODULE->name, i, contexts[i]->probe_point);"; - o->newline(-1) << "}"; - o->newline(-1) << "}"; - o->indent(-1); - - // Just in case things are really really stuck, a handler - // probably suffered a fault, and the kernel probably killed a - // task/thread already. We can't be quite sure in what state - // everything is in, however auxiliary stuff like kprobes / - // uprobes / locks have already been unregistered. So it's - // *probably* safe to pretend/assume/hope everything is OK, and - // let the cleanup finish. - // - // In the worst case, there may occur a fault, as a genuinely - // running probe handler tries to access script globals (about - // to be freed), or something accesses module memory (about to - // be unloaded). This is sometimes stinky, so the alternative - // (default) is to change from a livelock to a livelock that - // sleeps awhile. - o->newline() << "#ifdef STAP_OVERRIDE_STUCK_CONTEXT"; - o->newline() << "if (time_after(jiffies, hold_start + HZ*10)) { "; // > 10 seconds - o->newline(1) << "printk(KERN_ERR \"%s overriding stuck context to allow module shutdown.\", THIS_MODULE->name);"; - o->newline() << "holdon = 0;"; // allow loop to exit - o->newline(-1) << "}"; - o->newline() << "#else"; - o->newline() << "msleep (250);"; // at least stop sucking down the staprun cpu - o->newline() << "#endif"; - - // NB: we run at least one of these during the shutdown sequence: - o->newline () << "yield ();"; // aka schedule() and then some - o->newline(-1) << "} while (holdon);"; - } + o->newline() << "_stp_runtime_context_wait();"; // cargo cult epilogue o->newline() << "atomic_set (&session_state, STAP_SESSION_STOPPED);"; @@ -1922,17 +1840,8 @@ c_unparser::emit_module_exit () o->newline() << getvar (v).fini(); } - if (! session->runtime_usermode_p()) - { - o->newline() << "for_each_possible_cpu(cpu) {"; - o->indent(1); - o->newline() << "if (contexts[cpu] != NULL) {"; - o->indent(1); - o->newline() << "_stp_kfree(contexts[cpu]);"; - o->newline() << "contexts[cpu] = NULL;"; - o->newline(-1) << "}"; - o->newline(-1) << "}"; - } + // We're finished with the contexts. + o->newline() << "_stp_runtime_contexts_free();"; // teardown gettimeofday (if needed) o->newline() << "#ifdef STAP_NEED_GETTIMEOFDAY"; -- 2.43.5