From 83c5b5febc674ae0810e8dfde55af1d91a2b0b80 Mon Sep 17 00:00:00 2001 From: hunt Date: Tue, 8 May 2007 20:38:10 +0000 Subject: [PATCH] 2007-05-08 Martin Hunt Signal handler cleanup. * mainloop.c (fatal_handler): New. Cleanly handle unexpected fatal signals. (setup_main_signals): New. Set signals once mainloop is entered. (setup_signals): New. Block certain signals during initialization. Set handler for fatal signals. * relay.c (reader_thread): Use ppoll(). Terminate on SIGUSR2 after reading any remaining data. (close_relayfs): Remove sleep hack. Send SIGUSR2 to all threads. Runtime debug messages. * staprun.h: Change dbug() to accept a debuglevel and enable it. * *.c: Modify dbug() calls. --- runtime/staprun/ChangeLog | 18 +++++ runtime/staprun/ctl.c | 4 +- runtime/staprun/mainloop.c | 129 +++++++++++++++++++++++++----------- runtime/staprun/relay.c | 63 +++++++++--------- runtime/staprun/relay_old.c | 8 +-- runtime/staprun/staprun.c | 8 ++- runtime/staprun/staprun.h | 6 +- 7 files changed, 154 insertions(+), 82 deletions(-) diff --git a/runtime/staprun/ChangeLog b/runtime/staprun/ChangeLog index 12bba523c..3d1227599 100644 --- a/runtime/staprun/ChangeLog +++ b/runtime/staprun/ChangeLog @@ -1,3 +1,21 @@ +2007-05-08 Martin Hunt + + Signal handler cleanup. + * mainloop.c (fatal_handler): New. Cleanly handle + unexpected fatal signals. + (setup_main_signals): New. Set signals once mainloop + is entered. + (setup_signals): New. Block certain signals during initialization. + Set handler for fatal signals. + * relay.c (reader_thread): Use ppoll(). Terminate on + SIGUSR2 after reading any remaining data. + (close_relayfs): Remove sleep hack. Send SIGUSR2 to all threads. + + Runtime debug messages. + * staprun.h: Change dbug() to accept a debuglevel and + enable it. + * *.c: Modify dbug() calls. + 2007-05-07 Martin Hunt Patch from David Smith * mainloop.c (stp_main_loop): Properly handle write() diff --git a/runtime/staprun/ctl.c b/runtime/staprun/ctl.c index 95337f592..2996174bd 100644 --- a/runtime/staprun/ctl.c +++ b/runtime/staprun/ctl.c @@ -40,7 +40,7 @@ static void read_buffer_info(void) if (ret != 2) fprintf (stderr, "ERROR: couldn't read bufsize.\n"); - dbug("n_subbufs= %u, size=%u\n", n_subbufs, subbuf_size); + dbug(2, "n_subbufs= %u, size=%u\n", n_subbufs, subbuf_size); close(fd); return; } @@ -56,7 +56,7 @@ int init_ctl_channel(void) else sprintf (buf, "/proc/systemtap/%s/cmd", modname); - dbug("Opening %s\n", buf); + dbug(2, "Opening %s\n", buf); control_channel = open(buf, O_RDWR); if (control_channel < 0) { if (attach_mod) diff --git a/runtime/staprun/mainloop.c b/runtime/staprun/mainloop.c index 6a9b227da..02a9447ae 100644 --- a/runtime/staprun/mainloop.c +++ b/runtime/staprun/mainloop.c @@ -17,6 +17,77 @@ int control_channel = 0; int ncpus; int use_old_transport = 0; +#define ERR_MSG "\nUNEXPECTED FATAL ERROR in staprun. Please file a bug report.\n" +void fatal_handler (int signum) +{ + char *str = strsignal(signum); + (void)write (STDERR_FILENO, ERR_MSG, sizeof(ERR_MSG)); + (void)write (STDERR_FILENO, str, strlen(str)); + (void)write (STDERR_FILENO, "\n", 1); + _exit(-1); + +} + +static void sigproc(int signum) +{ + dbug(2, "sigproc %d (%s)\n", signum, strsignal(signum)); + + if (signum == SIGCHLD) { + pid_t pid = waitpid(-1, NULL, WNOHANG); + if (pid != target_pid) + return; + send_request(STP_EXIT, NULL, 0); + } else if (signum == SIGQUIT) + cleanup_and_exit(2); + + else if (signum == SIGINT || signum == SIGHUP || signum == SIGTERM) + send_request(STP_EXIT, NULL, 0); +} + +static void setup_main_signals(void) +{ + struct sigaction a; + memset(&a, 0, sizeof(a)); + sigfillset(&a.sa_mask); + a.sa_handler = sigproc; + sigaction(SIGINT, &a, NULL); + sigaction(SIGTERM, &a, NULL); + sigaction(SIGHUP, &a, NULL); + sigaction(SIGCHLD, &a, NULL); + sigaction(SIGQUIT, &a, NULL); +} + +void setup_signals(void) +{ + sigset_t s; + struct sigaction a; + + /* blocking all signals while we set things up */ + sigfillset(&s); + pthread_sigmask(SIG_SETMASK, &s, NULL); + + /* set some of them to be ignored */ + memset(&a, 0, sizeof(a)); + sigfillset(&a.sa_mask); + a.sa_handler = SIG_IGN; + sigaction(SIGPIPE, &a, NULL); + sigaction(SIGUSR2, &a, NULL); + + /* for serious errors, handle them in fatal_handler */ + a.sa_handler = fatal_handler; + sigaction(SIGBUS, &a, NULL); + sigaction(SIGFPE, &a, NULL); + sigaction(SIGILL, &a, NULL); + sigaction(SIGSEGV, &a, NULL); + sigaction(SIGXCPU, &a, NULL); + sigaction(SIGXFSZ, &a, NULL); + + /* unblock all signals */ + sigemptyset(&s); + pthread_sigmask(SIG_SETMASK, &s, NULL); +} + + /** * send_request - send request to kernel over control channel * @type: the relay-app command id @@ -53,9 +124,9 @@ void start_cmd(void) sigemptyset(&usrset); sigaddset(&usrset, SIGUSR1); - sigprocmask(SIG_BLOCK, &usrset, NULL); + pthread_sigmask(SIG_BLOCK, &usrset, NULL); - dbug ("execing target_cmd %s\n", target_cmd); + dbug (1, "execing target_cmd %s\n", target_cmd); if ((pid = fork()) < 0) { perror ("fork"); exit(-1); @@ -89,7 +160,7 @@ void system_cmd(char *cmd) { pid_t pid; - dbug ("system %s\n", cmd); + dbug (2, "system %s\n", cmd); if ((pid = fork()) < 0) { perror ("fork"); } else if (pid == 0) { @@ -117,9 +188,8 @@ static int run_stp_check (void) { int ret ; /* run the _stp_check script */ - dbug("executing %s\n", stp_check); + dbug(2, "executing %s\n", stp_check); ret = system(stp_check); - dbug("DONE\n"); return ret; } @@ -136,7 +206,7 @@ int init_staprun(void) int pid; if (system(VERSION_CMD)) { - dbug("Using OLD TRANSPORT\n"); + dbug(1, "Using OLD TRANSPORT\n"); use_old_transport = 1; } @@ -211,16 +281,11 @@ void cleanup_and_exit (int closed) pid_t err; static int exiting = 0; - signal(SIGINT, SIG_IGN); - signal(SIGTERM, SIG_IGN); - signal(SIGHUP, SIG_IGN); - signal(SIGQUIT, SIG_IGN); - if (exiting) return; exiting = 1; - dbug("CLEANUP AND EXIT closed=%d\n", closed); + dbug(1, "CLEANUP AND EXIT closed=%d\n", closed); /* what about child processes? we will wait for them here. */ err = waitpid(-1, NULL, WNOHANG); @@ -233,11 +298,11 @@ void cleanup_and_exit (int closed) else close_relayfs(); - dbug("closing control channel\n"); + dbug(1, "closing control channel\n"); close_ctl_channel(); if (closed == 0) { - dbug("removing module\n"); + dbug(1, "removing module\n"); snprintf(tmpbuf, sizeof(tmpbuf), "/sbin/rmmod -w %s", modname); if (system(tmpbuf)) { fprintf(stderr, "ERROR: couldn't rmmod probe module %s.\n", modname); @@ -251,18 +316,6 @@ void cleanup_and_exit (int closed) exit(0); } -static void sigproc(int signum) -{ - dbug("sigproc %d\n", signum); - if (signum == SIGCHLD) { - pid_t pid = waitpid(-1, NULL, WNOHANG); - if (pid != target_pid) - return; - } else if (signum == SIGQUIT) - cleanup_and_exit(2); - - send_request(STP_EXIT, NULL, 0); -} /** * stp_main_loop - loop forever reading data @@ -277,20 +330,17 @@ int stp_main_loop(void) FILE *ofp = stdout; setvbuf(ofp, (char *)NULL, _IOLBF, 0); + setup_main_signals(); - signal(SIGINT, sigproc); - signal(SIGTERM, sigproc); - signal(SIGHUP, sigproc); - signal(SIGCHLD, sigproc); - signal(SIGQUIT, sigproc); - - dbug("in main loop\n"); + dbug(2, "in main loop\n"); while (1) { /* handle messages from control channel */ nb = read(control_channel, recvbuf, sizeof(recvbuf)); if (nb <= 0) { - perror("recv"); - fprintf(stderr, "WARNING: unexpected EOF. nb=%ld\n", (long)nb); + if (errno != EINTR) { + perror("recv"); + fprintf(stderr, "WARNING: unexpected EOF. nb=%ld\n", (long)nb); + } continue; } @@ -322,14 +372,14 @@ int stp_main_loop(void) { /* module asks us to unload it and exit */ int *closed = (int *)data; - dbug("got STP_EXIT, closed=%d\n", *closed); + dbug(2, "got STP_EXIT, closed=%d\n", *closed); cleanup_and_exit(*closed); break; } case STP_START: { struct _stp_msg_start *t = (struct _stp_msg_start *)data; - dbug("probe_start() returned %d\n", t->res); + dbug(2, "probe_start() returned %d\n", t->res); if (t->res < 0) { if (target_cmd) kill (target_pid, SIGKILL); @@ -341,6 +391,7 @@ int stp_main_loop(void) case STP_SYSTEM: { struct _stp_msg_cmd *c = (struct _stp_msg_cmd *)data; + dbug(2, "STP_SYSTEM: %s\n", c->cmd); system_cmd(c->cmd); break; } @@ -362,14 +413,14 @@ int stp_main_loop(void) } case STP_MODULE: { - dbug("STP_MODULES request received\n"); + dbug(2, "STP_MODULES request received\n"); do_module(data); break; } case STP_SYMBOLS: { struct _stp_msg_symbol *req = (struct _stp_msg_symbol *)data; - dbug("STP_SYMBOLS request received\n"); + dbug(2, "STP_SYMBOLS request received\n"); if (req->endian != 0x1234) { fprintf(stderr,"ERROR: staprun is compiled with different endianess than the kernel!\n"); cleanup_and_exit(0); diff --git a/runtime/staprun/relay.c b/runtime/staprun/relay.c index 82c5ccc46..bb575d49b 100644 --- a/runtime/staprun/relay.c +++ b/runtime/staprun/relay.c @@ -15,8 +15,8 @@ int out_fd[NR_CPUS]; static pthread_t reader[NR_CPUS]; static int relay_fd[NR_CPUS]; -static int stop_threads = 0; static int bulkmode = 0; +static int stop_threads = 0; /** * reader_thread - per-cpu channel buffer reader @@ -27,7 +27,11 @@ static void *reader_thread(void *data) char buf[131072]; int rc, cpu = (int)(long)data; struct pollfd pollfd; - int max_rd = 0; + struct timespec tim = {.tv_sec=0, .tv_nsec=10000}, *timeout = &tim; + sigset_t sigs; + + sigemptyset(&sigs); + sigaddset(&sigs,SIGUSR2); if (bulkmode) { cpu_set_t cpu_mask; @@ -36,42 +40,32 @@ static void *reader_thread(void *data) if( sched_setaffinity( 0, sizeof(cpu_mask), &cpu_mask ) < 0 ) { perror("sched_setaffinity"); } + timeout = NULL; } pollfd.fd = relay_fd[cpu]; pollfd.events = POLLIN; do { - rc = poll(&pollfd, 1, 10); + rc = ppoll(&pollfd, 1, &tim, &sigs); if (rc < 0) { + dbug(3, "poll=%d errno=%d\n", rc, errno); if (errno != EINTR) { fprintf(stderr, "poll error: %s\n",strerror(errno)); pthread_exit(NULL); } - fprintf(stderr, "poll warning: %s\n",strerror(errno)); + stop_threads = 1; } - rc = read(relay_fd[cpu], buf, sizeof(buf)); - if (!rc) { - continue; + while ((rc = read(relay_fd[cpu], buf, sizeof(buf))) > 0) { + if (write(out_fd[cpu], buf, rc) != rc) { + fprintf(stderr, "Couldn't write to output fd %d for cpu %d, exiting: errcode = %d: %s\n", + out_fd[cpu], cpu, errno, strerror(errno)); + pthread_exit(NULL); + } } - if (rc < 0) { - if (errno == EAGAIN) - continue; - fprintf(stderr, "error reading fd %d on cpu %d: %s\n", relay_fd[cpu], cpu, strerror(errno)); - continue; - } - - if (rc > max_rd) - max_rd = rc; - - if (write(out_fd[cpu], buf, rc) != rc) { - fprintf(stderr, "Couldn't write to output fd %d for cpu %d, exiting: errcode = %d: %s\n", - out_fd[cpu], cpu, errno, strerror(errno)); - pthread_exit(NULL); - } - } while (!stop_threads); - pthread_exit((void *)(long)max_rd); + dbug(3, "exiting thread\n"); + pthread_exit(NULL); } /** @@ -85,7 +79,7 @@ int init_relayfs(void) struct statfs st; char buf[128], relay_filebase[128]; - dbug("initializing relayfs\n"); + dbug(1, "initializing relayfs\n"); reader[0] = (pthread_t)0; relay_fd[0] = 0; @@ -101,13 +95,13 @@ int init_relayfs(void) for (i = 0; i < NR_CPUS; i++) { sprintf(buf, "%s/trace%d", relay_filebase, i); - dbug("attempting to open %s\n", buf); + dbug(2, "attempting to open %s\n", buf); relay_fd[i] = open(buf, O_RDONLY | O_NONBLOCK); if (relay_fd[i] < 0) break; } ncpus = i; - dbug("ncpus=%d\n", ncpus); + dbug(2, "ncpus=%d\n", ncpus); if (ncpus == 0) { err("couldn't open %s.\n", buf); @@ -128,7 +122,6 @@ int init_relayfs(void) sprintf(buf, "stpd_cpu%d", i); out_fd[i] = open (buf, O_CREAT|O_TRUNC|O_WRONLY, 0666); - dbug("out_fd[%d] = %d\n", i, out_fd[i]); if (out_fd[i] < 0) { fprintf(stderr, "ERROR: couldn't open output file %s.\n", buf); return -1; @@ -146,7 +139,7 @@ int init_relayfs(void) out_fd[0] = STDOUT_FILENO; } - dbug("starting threads\n"); + dbug(2, "starting threads\n"); for (i = 0; i < ncpus; i++) { if (pthread_create(&reader[i], NULL, reader_thread, (void *)(long)i) < 0) { fprintf(stderr, "failed to create thread\n"); @@ -162,11 +155,17 @@ void close_relayfs(void) { int i; void *res; - dbug("closing\n"); - sleep(1); stop_threads = 1; + dbug(2, "closing\n"); + for (i = 0; i < ncpus; i++) { + if (reader[i]) + pthread_kill(reader[i], SIGUSR2); + else + break; + } + dbug(2, "sent SIGUSR2\n"); for (i = 0; i < ncpus; i++) { if (reader[i]) pthread_join(reader[i], &res); @@ -180,6 +179,6 @@ void close_relayfs(void) else break; } - dbug("closed files\n"); + dbug(2, "done\n"); } diff --git a/runtime/staprun/relay_old.c b/runtime/staprun/relay_old.c index b0d2e43de..11a73cd73 100644 --- a/runtime/staprun/relay_old.c +++ b/runtime/staprun/relay_old.c @@ -57,7 +57,7 @@ void close_oldrelayfs(int detach) if (!bulkmode) return; - dbug("detach=%d, ncpus=%d\n", detach, ncpus); + dbug(2, "detach=%d, ncpus=%d\n", detach, ncpus); if (detach) { for (i = 0; i < ncpus; i++) @@ -91,7 +91,7 @@ static int open_relayfs_files(int cpu, const char *relay_filebase, const char *p } sprintf(tmp, "%s%d", proc_filebase, cpu); - dbug("Opening %s.\n", tmp); + dbug(2, "Opening %s.\n", tmp); proc_fd[cpu] = open(tmp, O_RDWR | O_NONBLOCK); if (proc_fd[cpu] < 0) { fprintf(stderr, "ERROR: couldn't open proc file %s: errcode = %s\n", tmp, strerror(errno)); @@ -221,7 +221,7 @@ int init_oldrelayfs(void) struct statfs st; char relay_filebase[128], proc_filebase[128]; - dbug("initializing relayfs.n_subbufs=%d subbuf_size=%d\n", n_subbufs, subbuf_size); + dbug(2, "initializing relayfs.n_subbufs=%d subbuf_size=%d\n", n_subbufs, subbuf_size); if (n_subbufs) bulkmode = 1; @@ -265,7 +265,7 @@ int init_oldrelayfs(void) } ncpus = i; - dbug("ncpus=%d\n", ncpus); + dbug(2, "ncpus=%d\n", ncpus); for (i = 0; i < ncpus; i++) { /* create a thread for each per-cpu buffer */ diff --git a/runtime/staprun/staprun.c b/runtime/staprun/staprun.c index 4ead70182..f40f676d4 100644 --- a/runtime/staprun/staprun.c +++ b/runtime/staprun/staprun.c @@ -65,7 +65,7 @@ static void usage(char *prog) { fprintf(stderr, "\n%s [-v] [-c cmd ] [-x pid] [-u user]\n" "\t[-A modname]] [-L] [-b bufsize] [-o FILE] kmod-name [kmod-options]\n", prog); - fprintf(stderr, "-v Verbose.\n"); + fprintf(stderr, "-v increase Verbosity.\n"); fprintf(stderr, "-c cmd. Command \'cmd\' will be run and staprun will exit when it does.\n"); fprintf(stderr, " _stp_target will contain the pid for the command.\n"); fprintf(stderr, "-x pid. Sets _stp_target to pid.\n"); @@ -84,10 +84,12 @@ int main(int argc, char **argv) { int c; + setup_signals(); + while ((c = getopt(argc, argv, "ALvb:t:d:c:o:u:x:")) != EOF) { switch (c) { case 'v': - verbose = 1; + verbose++; break; case 'b': { @@ -136,7 +138,7 @@ int main(int argc, char **argv) if (optind < argc) { modpath = argv[optind++]; path_parse_modname(modpath); - dbug("modpath=\"%s\", modname=\"%s\"\n", modpath, modname); + dbug(2, "modpath=\"%s\", modname=\"%s\"\n", modpath, modname); } if (optind < argc) { diff --git a/runtime/staprun/staprun.h b/runtime/staprun/staprun.h index c3599de6a..f8fcfe66c 100644 --- a/runtime/staprun/staprun.h +++ b/runtime/staprun/staprun.h @@ -34,10 +34,11 @@ #include #include +#define DEBUG #ifdef DEBUG -#define dbug(args...) {fprintf(stderr,"%s:%d ",__FUNCTION__, __LINE__); fprintf(stderr,args); } +#define dbug(level, args...) {if (verbose>=level) {fprintf(stderr,"%s:%d ",__FUNCTION__, __LINE__); fprintf(stderr,args);}} #else -#define dbug(args...) ; +#define dbug(level, args...) ; #endif /* DEBUG */ #define err(args...) {fprintf(stderr,"%s:%d ",__FUNCTION__, __LINE__); fprintf(stderr,args); } @@ -69,6 +70,7 @@ int init_relayfs(void); void close_relayfs(void); int init_oldrelayfs(void); void close_oldrelayfs(int); +void setup_signals(void); /* * variables -- 2.43.5