]>
Commit | Line | Data |
---|---|---|
a846e9cd MH |
1 | /* -*- linux-c -*- |
2 | * | |
3 | * relay.c - staprun relayfs functions | |
4 | * | |
5 | * This file is part of systemtap, and is free software. You can | |
6 | * redistribute it and/or modify it under the terms of the GNU General | |
7 | * Public License (GPL); either version 2, or (at your option) any | |
8 | * later version. | |
9 | * | |
10 | * Copyright (C) 2007 Red Hat Inc. | |
11 | */ | |
12 | ||
13 | #include "staprun.h" | |
14 | ||
a846e9cd MH |
15 | int out_fd[NR_CPUS]; |
16 | static pthread_t reader[NR_CPUS]; | |
17 | static int relay_fd[NR_CPUS]; | |
a846e9cd | 18 | static int bulkmode = 0; |
1e17f6a2 | 19 | static volatile int stop_threads = 0; |
04ae1b09 MH |
20 | static time_t *time_backlog[NR_CPUS]; |
21 | static int backlog_order=0; | |
22 | #define BACKLOG_MASK ((1 << backlog_order) - 1) | |
a846e9cd | 23 | |
cac1d094 MH |
24 | /* |
25 | * ppoll exists in glibc >= 2.4 | |
26 | */ | |
27 | #if (__GLIBC__ < 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ < 4)) | |
1e17f6a2 MH |
28 | #define NEED_PPOLL |
29 | #endif | |
30 | ||
31 | #ifdef NEED_PPOLL | |
cac1d094 MH |
32 | static int ppoll(struct pollfd *fds, nfds_t nfds, |
33 | const struct timespec *timeout, const sigset_t *sigmask) | |
34 | { | |
35 | sigset_t origmask; | |
36 | int ready; | |
37 | int tim; | |
38 | if (timeout == NULL) | |
39 | tim = -1; | |
40 | else | |
41 | tim = timeout->tv_sec * 1000 + timeout->tv_nsec / 1000000; | |
42 | ||
43 | sigprocmask(SIG_SETMASK, sigmask, &origmask); | |
44 | ready = poll(fds, nfds, tim); | |
45 | sigprocmask(SIG_SETMASK, &origmask, NULL); | |
46 | return ready; | |
47 | } | |
48 | #endif | |
49 | ||
04ae1b09 | 50 | int init_backlog(int cpu) |
acd56c22 | 51 | { |
04ae1b09 MH |
52 | int order = 0; |
53 | if (!fnum_max) | |
54 | return 0; | |
55 | while (fnum_max >> order) order++; | |
56 | if (fnum_max == 1<<(order-1)) order--; | |
57 | time_backlog[cpu] = (time_t *)calloc(1<<order, sizeof(time_t)); | |
58 | if (time_backlog[cpu] == NULL) { | |
59 | _err("Memory allocation failed\n"); | |
60 | return -1; | |
61 | } | |
62 | backlog_order = order; | |
63 | return 0; | |
64 | } | |
65 | ||
66 | void write_backlog(int cpu, int fnum, time_t t) | |
67 | { | |
68 | time_backlog[cpu][fnum & BACKLOG_MASK] = t; | |
69 | } | |
70 | ||
71 | time_t read_backlog(int cpu, int fnum) | |
72 | { | |
73 | return time_backlog[cpu][fnum & BACKLOG_MASK]; | |
74 | } | |
75 | ||
76 | int make_outfile_name(char *buf, int max, int fnum, int cpu, time_t t) | |
77 | { | |
78 | int len; | |
79 | len = stap_strfloctime(buf, max, outfile_name, t); | |
80 | if (len < 0) { | |
81 | err("Invalid FILE name format\n"); | |
82 | return -1; | |
83 | } | |
acd56c22 MH |
84 | if (bulkmode) { |
85 | /* special case: for testing we sometimes want to write to /dev/null */ | |
86 | if (strcmp(outfile_name, "/dev/null") == 0) { | |
87 | strcpy(buf, "/dev/null"); | |
88 | } else { | |
04ae1b09 MH |
89 | if (snprintf_chk(&buf[len], PATH_MAX - len, |
90 | "_cpu%d.%d", cpu, fnum)) | |
acd56c22 MH |
91 | return -1; |
92 | } | |
93 | } else { | |
94 | /* stream mode */ | |
04ae1b09 | 95 | if (snprintf_chk(&buf[len], PATH_MAX - len, ".%d", fnum)) |
acd56c22 MH |
96 | return -1; |
97 | } | |
98 | return 0; | |
99 | } | |
100 | ||
101 | static int open_outfile(int fnum, int cpu, int remove_file) | |
102 | { | |
103 | char buf[PATH_MAX]; | |
04ae1b09 | 104 | time_t t; |
acd56c22 MH |
105 | if (!outfile_name) { |
106 | _err("-S is set without -o. Please file a bug report.\n"); | |
107 | return -1; | |
108 | } | |
109 | ||
04ae1b09 MH |
110 | time(&t); |
111 | if (fnum_max) { | |
112 | if (remove_file) { | |
113 | /* remove oldest file */ | |
114 | if (make_outfile_name(buf, PATH_MAX, fnum - fnum_max, | |
115 | cpu, read_backlog(cpu, fnum - fnum_max)) < 0) | |
116 | return -1; | |
117 | remove(buf); /* don't care */ | |
118 | } | |
119 | write_backlog(cpu, fnum, t); | |
acd56c22 MH |
120 | } |
121 | ||
04ae1b09 | 122 | if (make_outfile_name(buf, PATH_MAX, fnum, cpu, t) < 0) |
acd56c22 MH |
123 | return -1; |
124 | out_fd[cpu] = open (buf, O_CREAT|O_TRUNC|O_WRONLY, 0666); | |
125 | if (out_fd[cpu] < 0) { | |
126 | perr("Couldn't open output file %s", buf); | |
127 | return -1; | |
128 | } | |
129 | if (set_clexec(out_fd[cpu]) < 0) | |
130 | return -1; | |
131 | return 0; | |
132 | } | |
133 | ||
a846e9cd MH |
134 | /** |
135 | * reader_thread - per-cpu channel buffer reader | |
136 | */ | |
1e17f6a2 | 137 | static void empty_handler(int __attribute__((unused)) sig) { /* do nothing */ } |
a846e9cd MH |
138 | |
139 | static void *reader_thread(void *data) | |
140 | { | |
141 | char buf[131072]; | |
142 | int rc, cpu = (int)(long)data; | |
143 | struct pollfd pollfd; | |
62289f0a | 144 | struct timespec tim = {.tv_sec=0, .tv_nsec=200000000}, *timeout = &tim; |
83c5b5fe | 145 | sigset_t sigs; |
1e17f6a2 | 146 | struct sigaction sa; |
acd56c22 MH |
147 | off_t wsize = 0; |
148 | int fnum = 0; | |
149 | int remove_file = 0; | |
83c5b5fe MH |
150 | |
151 | sigemptyset(&sigs); | |
152 | sigaddset(&sigs,SIGUSR2); | |
1e17f6a2 MH |
153 | pthread_sigmask(SIG_BLOCK, &sigs, NULL); |
154 | ||
155 | sigfillset(&sigs); | |
156 | sigdelset(&sigs,SIGUSR2); | |
157 | ||
158 | sa.sa_handler = empty_handler; | |
159 | sa.sa_flags = 0; | |
160 | sigemptyset(&sa.sa_mask); | |
161 | sigaction(SIGUSR2, &sa, NULL); | |
a846e9cd MH |
162 | |
163 | if (bulkmode) { | |
164 | cpu_set_t cpu_mask; | |
165 | CPU_ZERO(&cpu_mask); | |
166 | CPU_SET(cpu, &cpu_mask); | |
5eddf13b DS |
167 | if( sched_setaffinity( 0, sizeof(cpu_mask), &cpu_mask ) < 0 ) |
168 | _perr("sched_setaffinity"); | |
1e17f6a2 MH |
169 | #ifdef NEED_PPOLL |
170 | /* Without a real ppoll, there is a small race condition that could */ | |
171 | /* block ppoll(). So use a timeout to prevent that. */ | |
172 | timeout->tv_sec = 10; | |
173 | timeout->tv_nsec = 0; | |
174 | #else | |
83c5b5fe | 175 | timeout = NULL; |
1e17f6a2 | 176 | #endif |
a846e9cd MH |
177 | } |
178 | ||
179 | pollfd.fd = relay_fd[cpu]; | |
180 | pollfd.events = POLLIN; | |
181 | ||
182 | do { | |
d424e518 | 183 | rc = ppoll(&pollfd, 1, timeout, &sigs); |
a846e9cd | 184 | if (rc < 0) { |
1e17f6a2 | 185 | dbug(3, "cpu=%d poll=%d errno=%d\n", cpu, rc, errno); |
a846e9cd | 186 | if (errno != EINTR) { |
5eddf13b | 187 | _perr("poll error"); |
1bcb8a30 | 188 | goto error_out; |
a846e9cd | 189 | } |
a846e9cd | 190 | } |
83c5b5fe | 191 | while ((rc = read(relay_fd[cpu], buf, sizeof(buf))) > 0) { |
acd56c22 MH |
192 | wsize += rc; |
193 | /* Switching file */ | |
194 | if (fsize_max && wsize > fsize_max) { | |
195 | close(out_fd[cpu]); | |
196 | fnum++; | |
197 | if (fnum_max && fnum == fnum_max) | |
198 | remove_file = 1; | |
199 | if (open_outfile(fnum, cpu, remove_file) < 0) { | |
200 | perr("Couldn't open file for cpu %d, exiting.", cpu); | |
1bcb8a30 | 201 | goto error_out; |
acd56c22 | 202 | } |
5aa1f218 | 203 | wsize = rc; |
acd56c22 | 204 | } |
83c5b5fe | 205 | if (write(out_fd[cpu], buf, rc) != rc) { |
1bcb8a30 JS |
206 | if (errno != EPIPE) |
207 | perr("Couldn't write to output %d for cpu %d, exiting.", out_fd[cpu], cpu); | |
208 | goto error_out; | |
83c5b5fe | 209 | } |
a846e9cd | 210 | } |
a846e9cd | 211 | } while (!stop_threads); |
1bcb8a30 JS |
212 | dbug(3, "exiting thread for cpu %d\n", cpu); |
213 | return(NULL); | |
214 | ||
215 | error_out: | |
216 | /* Signal the main thread that we need to quit */ | |
217 | kill(getpid(), SIGTERM); | |
218 | dbug(2, "exiting thread for cpu %d after error\n", cpu); | |
1e17f6a2 | 219 | return(NULL); |
a846e9cd MH |
220 | } |
221 | ||
222 | /** | |
223 | * init_relayfs - create files and threads for relayfs processing | |
224 | * | |
225 | * Returns 0 if successful, negative otherwise | |
226 | */ | |
5d65678d | 227 | int init_relayfs(void) |
a846e9cd | 228 | { |
04ae1b09 | 229 | int i, len; |
a846e9cd | 230 | struct statfs st; |
b7133b5f | 231 | char rqbuf[128]; |
61c97c45 | 232 | char buf[PATH_MAX], relay_filebase[PATH_MAX]; |
a846e9cd | 233 | |
5eddf13b | 234 | dbug(2, "initializing relayfs\n"); |
a846e9cd MH |
235 | |
236 | reader[0] = (pthread_t)0; | |
237 | relay_fd[0] = 0; | |
238 | out_fd[0] = 0; | |
239 | ||
5eddf13b DS |
240 | if (statfs("/sys/kernel/debug", &st) == 0 |
241 | && (int) st.f_type == (int) DEBUGFS_MAGIC) { | |
242 | if (sprintf_chk(relay_filebase, | |
243 | "/sys/kernel/debug/systemtap/%s", | |
244 | modname)) | |
245 | return -1; | |
246 | } | |
a846e9cd | 247 | else { |
5eddf13b | 248 | err("Cannot find relayfs or debugfs mount point.\n"); |
a846e9cd MH |
249 | return -1; |
250 | } | |
251 | ||
b7133b5f | 252 | if (send_request(STP_BULK, rqbuf, sizeof(rqbuf)) > 0) |
3f569620 | 253 | bulkmode = 1; |
5d65678d MH |
254 | |
255 | for (i = 0; i < NR_CPUS; i++) { | |
5eddf13b DS |
256 | if (sprintf_chk(buf, "%s/trace%d", relay_filebase, i)) |
257 | return -1; | |
83c5b5fe | 258 | dbug(2, "attempting to open %s\n", buf); |
5d65678d | 259 | relay_fd[i] = open(buf, O_RDONLY | O_NONBLOCK); |
577e7ed1 | 260 | if (relay_fd[i] < 0 || set_clexec(relay_fd[i]) < 0) |
5d65678d MH |
261 | break; |
262 | } | |
263 | ncpus = i; | |
3f569620 | 264 | dbug(2, "ncpus=%d, bulkmode = %d\n", ncpus, bulkmode); |
5d65678d MH |
265 | |
266 | if (ncpus == 0) { | |
5eddf13b | 267 | _err("couldn't open %s.\n", buf); |
5d65678d MH |
268 | return -1; |
269 | } | |
3f569620 | 270 | if (ncpus > 1 && bulkmode == 0) { |
5eddf13b DS |
271 | _err("ncpus=%d, bulkmode = %d\n", ncpus, bulkmode); |
272 | _err("This is inconsistent! Please file a bug report. Exiting now.\n"); | |
3f569620 MH |
273 | return -1; |
274 | } | |
5d65678d | 275 | |
acd56c22 MH |
276 | if (fsize_max) { |
277 | /* switch file mode */ | |
04ae1b09 MH |
278 | for (i = 0; i < ncpus; i++) { |
279 | if (init_backlog(i) < 0) | |
acd56c22 | 280 | return -1; |
04ae1b09 MH |
281 | if (open_outfile(0, i, 0) < 0) |
282 | return -1; | |
283 | } | |
acd56c22 | 284 | } else if (bulkmode) { |
a846e9cd | 285 | for (i = 0; i < ncpus; i++) { |
a846e9cd MH |
286 | if (outfile_name) { |
287 | /* special case: for testing we sometimes want to write to /dev/null */ | |
5eddf13b DS |
288 | if (strcmp(outfile_name, "/dev/null") == 0) { |
289 | strcpy(buf, "/dev/null"); | |
290 | } else { | |
04ae1b09 MH |
291 | len = stap_strfloctime(buf, PATH_MAX, |
292 | outfile_name, time(NULL)); | |
293 | if (len < 0) { | |
294 | err("Invalid FILE name format\n"); | |
295 | return -1; | |
296 | } | |
297 | if (snprintf_chk(&buf[len], | |
298 | PATH_MAX - len, "_%d", i)) | |
5eddf13b DS |
299 | return -1; |
300 | } | |
301 | } else { | |
302 | if (sprintf_chk(buf, "stpd_cpu%d", i)) | |
303 | return -1; | |
304 | } | |
5d65678d | 305 | |
a846e9cd | 306 | out_fd[i] = open (buf, O_CREAT|O_TRUNC|O_WRONLY, 0666); |
a846e9cd | 307 | if (out_fd[i] < 0) { |
5eddf13b | 308 | perr("Couldn't open output file %s", buf); |
a846e9cd MH |
309 | return -1; |
310 | } | |
577e7ed1 MH |
311 | if (set_clexec(out_fd[i]) < 0) |
312 | return -1; | |
a846e9cd | 313 | } |
a846e9cd MH |
314 | } else { |
315 | /* stream mode */ | |
a846e9cd | 316 | if (outfile_name) { |
04ae1b09 MH |
317 | len = stap_strfloctime(buf, PATH_MAX, |
318 | outfile_name, time(NULL)); | |
319 | if (len < 0) { | |
320 | err("Invalid FILE name format\n"); | |
321 | return -1; | |
322 | } | |
323 | out_fd[0] = open (buf, O_CREAT|O_TRUNC|O_WRONLY, 0666); | |
a846e9cd | 324 | if (out_fd[0] < 0) { |
04ae1b09 | 325 | perr("Couldn't open output file %s", buf); |
a846e9cd MH |
326 | return -1; |
327 | } | |
577e7ed1 MH |
328 | if (set_clexec(out_fd[i]) < 0) |
329 | return -1; | |
a846e9cd MH |
330 | } else |
331 | out_fd[0] = STDOUT_FILENO; | |
5d65678d | 332 | |
a846e9cd | 333 | } |
0dade809 MH |
334 | if (!load_only) { |
335 | dbug(2, "starting threads\n"); | |
336 | for (i = 0; i < ncpus; i++) { | |
337 | if (pthread_create(&reader[i], NULL, reader_thread, | |
338 | (void *)(long)i) < 0) { | |
339 | _perr("failed to create thread"); | |
340 | return -1; | |
341 | } | |
a846e9cd | 342 | } |
0dade809 MH |
343 | } |
344 | ||
a846e9cd MH |
345 | return 0; |
346 | } | |
347 | ||
348 | void close_relayfs(void) | |
349 | { | |
350 | int i; | |
a846e9cd | 351 | stop_threads = 1; |
83c5b5fe MH |
352 | dbug(2, "closing\n"); |
353 | for (i = 0; i < ncpus; i++) { | |
354 | if (reader[i]) | |
355 | pthread_kill(reader[i], SIGUSR2); | |
356 | else | |
357 | break; | |
358 | } | |
a846e9cd | 359 | for (i = 0; i < ncpus; i++) { |
1e17f6a2 MH |
360 | if (reader[i]) |
361 | pthread_join(reader[i], NULL); | |
a846e9cd MH |
362 | else |
363 | break; | |
364 | } | |
a846e9cd MH |
365 | for (i = 0; i < ncpus; i++) { |
366 | if (relay_fd[i] >= 0) | |
367 | close(relay_fd[i]); | |
368 | else | |
369 | break; | |
370 | } | |
83c5b5fe | 371 | dbug(2, "done\n"); |
a846e9cd MH |
372 | } |
373 |