]>
Commit | Line | Data |
---|---|---|
1 | /* -*- linux-c -*- | |
2 | * | |
3 | * staprun.c - SystemTap module loader | |
4 | * | |
5 | * Copyright (C) 2005-2019 Red Hat, Inc. | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | * | |
20 | */ | |
21 | ||
22 | #define _XOPEN_SOURCE | |
23 | #define _BSD_SOURCE | |
24 | #define _DEFAULT_SOURCE | |
25 | #include "staprun.h" | |
26 | #include "../privilege.h" | |
27 | #include "../runtime/k_syms.h" | |
28 | #include <string.h> | |
29 | #include <sys/uio.h> | |
30 | #include <glob.h> | |
31 | #include <time.h> | |
32 | #include <unistd.h> | |
33 | #include <sys/prctl.h> | |
34 | #include <sys/utsname.h> | |
35 | ||
36 | /* used in dbug, _err and _perr */ | |
37 | char *__name__ = "staprun"; | |
38 | ||
39 | extern long delete_module(const char *, unsigned int); | |
40 | ||
41 | int send_relocations (); | |
42 | int send_tzinfo (); | |
43 | int send_privilege_credentials (privilege_t user_credentials); | |
44 | int send_remote_id (); | |
45 | ||
46 | static int remove_module(const char *name, int verb); | |
47 | ||
48 | static int stap_module_inserted = -1; | |
49 | ||
50 | static void term_signal_handler(int signum __attribute ((unused))) | |
51 | { | |
52 | if (stap_module_inserted == 0) { | |
53 | remove_module(modname, 1); | |
54 | free(modname); | |
55 | } | |
56 | _exit(1); | |
57 | } | |
58 | ||
59 | void setup_term_signals(void) | |
60 | { | |
61 | sigset_t s; | |
62 | struct sigaction a; | |
63 | ||
64 | /* blocking all signals while we set things up */ | |
65 | sigfillset(&s); | |
66 | sigprocmask(SIG_SETMASK, &s, NULL); | |
67 | ||
68 | /* handle signals */ | |
69 | memset(&a, 0, sizeof(a)); | |
70 | sigfillset(&a.sa_mask); | |
71 | a.sa_handler = term_signal_handler; | |
72 | sigaction(SIGHUP, &a, NULL); | |
73 | sigaction(SIGINT, &a, NULL); | |
74 | sigaction(SIGTERM, &a, NULL); | |
75 | sigaction(SIGQUIT, &a, NULL); | |
76 | ||
77 | /* unblock all signals */ | |
78 | sigemptyset(&s); | |
79 | sigprocmask(SIG_SETMASK, &s, NULL); | |
80 | } | |
81 | ||
82 | static int run_as(int exec_p, uid_t uid, gid_t gid, const char *path, char *const argv[]) | |
83 | { | |
84 | pid_t pid; | |
85 | int rstatus; | |
86 | ||
87 | if (verbose >= 2) { | |
88 | int i = 0; | |
89 | eprintf(exec_p ? "execing: ": "spawning: "); | |
90 | while (argv[i]) { | |
91 | eprintf("%s ", argv[i]); | |
92 | i++; | |
93 | } | |
94 | eprintf("\n"); | |
95 | } | |
96 | ||
97 | if (exec_p) | |
98 | pid = 0; | |
99 | else | |
100 | pid = fork(); | |
101 | ||
102 | if (pid < 0) | |
103 | { | |
104 | _perr("fork"); | |
105 | return -1; | |
106 | } | |
107 | ||
108 | if (pid == 0) /* child process, or exec_p */ | |
109 | { | |
110 | /* Make sure we run as the full user. If we're | |
111 | * switching to a non-root user, this won't allow | |
112 | * that process to switch back to root (since the | |
113 | * original process is setuid). */ | |
114 | if (setresgid (gid, gid, gid) < 0) { | |
115 | _perr("setresgid"); | |
116 | exit(1); | |
117 | } | |
118 | if (setresuid (uid, uid, uid) < 0) { | |
119 | _perr("setresuid"); | |
120 | exit(1); | |
121 | } | |
122 | ||
123 | /* Actually run the command. */ | |
124 | if (execv(path, argv) < 0) | |
125 | perror(path); | |
126 | _exit(1); | |
127 | } | |
128 | ||
129 | if (waitpid(pid, &rstatus, 0) < 0) | |
130 | return -1; | |
131 | ||
132 | if (WIFEXITED(rstatus)) | |
133 | return WEXITSTATUS(rstatus); | |
134 | return -1; | |
135 | } | |
136 | ||
137 | /* | |
138 | * Module to be inserted has one or more user-space probes. Make sure | |
139 | * uprobes is enabled. | |
140 | * If /proc/kallsyms lists a symbol in uprobes (e.g. unregister_uprobe), | |
141 | * we're done. | |
142 | * Else try "modprobe uprobes" to load the uprobes module (if any) | |
143 | * built with the kernel. | |
144 | * If that fails, load the uprobes module built in runtime/uprobes. | |
145 | */ | |
146 | static int enable_uprobes(void) | |
147 | { | |
148 | char *argv[10]; | |
149 | char runtimeko[2048]; | |
150 | int rc; | |
151 | ||
152 | /* Formerly, we did a grep /proc/kallsyms search to see if | |
153 | uprobes was already loaded into the kernel. But this is | |
154 | a race waiting to happen. Just try to load the thing. | |
155 | Quietly accept a -EEXIST error. */ | |
156 | ||
157 | /* NB: don't use /sbin/modprobe, without more env. sanitation. */ | |
158 | ||
159 | /* Try the specified module or the one from the runtime. */ | |
160 | if (uprobes_path) | |
161 | snprintf (runtimeko, sizeof(runtimeko), "%s", uprobes_path); | |
162 | else | |
163 | /* NB: since PR5163, share/runtime/uprobes/uprobes.ko is not built | |
164 | by systemtap. */ | |
165 | snprintf (runtimeko, sizeof(runtimeko), "%s/uprobes/uprobes.ko", | |
166 | (getenv("SYSTEMTAP_RUNTIME") ?: PKGDATADIR "/runtime")); | |
167 | dbug(2, "Inserting uprobes module from %s.\n", runtimeko); | |
168 | /* This module may be signed, so use insert_module to load it. */ | |
169 | argv[0] = NULL; | |
170 | ||
171 | rc = insert_module(runtimeko, NULL, argv, assert_uprobes_module_permissions, NULL); | |
172 | if ((rc == 0) || /* OK */ | |
173 | (rc == -EEXIST)) /* Someone else might have loaded it */ | |
174 | return 0; | |
175 | ||
176 | err("Couldn't insert module '%s': %s\n", runtimeko, moderror(errno)); | |
177 | return 1; /* failure */ | |
178 | } | |
179 | ||
180 | static int insert_stap_module(privilege_t *user_credentials) | |
181 | { | |
182 | char special_options[128]; | |
183 | int rc, fips_mode_fd; | |
184 | char fips_mode = '0'; | |
185 | char *misc = ""; | |
186 | ||
187 | /* Add the _stp_bufsize option. */ | |
188 | if (snprintf_chk(special_options, sizeof (special_options), | |
189 | "_stp_bufsize=%d", (int)buffer_size)) | |
190 | return -1; | |
191 | ||
192 | fips_mode_fd = open("/proc/sys/crypto/fips_enabled", O_RDONLY); | |
193 | if (fips_mode_fd >= 0) { | |
194 | char c; | |
195 | rc = read(fips_mode_fd, &c, 1); | |
196 | if (rc == 1) fips_mode = c; | |
197 | close (fips_mode_fd); | |
198 | } | |
199 | ||
200 | /* In FIPS mode, a kernel may panic if given an improperly-signed module. | |
201 | Right now, we have no way of signing them with the kernel build-time keys, | |
202 | so we punt. See also SecureBoot. */ | |
203 | if ((fips_mode != '0') && !getenv("STAP_FIPS_OVERRIDE")) { | |
204 | errno = EPERM; | |
205 | stap_module_inserted = -1; | |
206 | misc = "in FIPS mode "; | |
207 | } else { | |
208 | stap_module_inserted = insert_module(modpath, special_options, | |
209 | modoptions, | |
210 | assert_stap_module_permissions, | |
211 | user_credentials); | |
212 | } | |
213 | if (stap_module_inserted != 0) | |
214 | err("Couldn't insert module %s'%s': %s\n", misc, modpath, moderror(errno)); | |
215 | return stap_module_inserted; | |
216 | } | |
217 | ||
218 | static void remove_all_modules(void) | |
219 | { | |
220 | char *base; | |
221 | struct statfs st; | |
222 | struct dirent *d; | |
223 | DIR *moddir; | |
224 | ||
225 | /* NB: nothing to do with PR14245 */ | |
226 | if (statfs("/sys/kernel/debug", &st) == 0 && (int)st.f_type == (int)DEBUGFS_MAGIC) | |
227 | base = "/sys/kernel/debug/systemtap"; | |
228 | else | |
229 | base = "/proc/systemtap"; | |
230 | ||
231 | moddir = opendir(base); | |
232 | if (moddir) { | |
233 | while ((d = readdir(moddir))) { | |
234 | if (strcmp(d->d_name, ".") == 0) continue; | |
235 | if (strcmp(d->d_name, "..") == 0) continue; | |
236 | relay_basedir_fd = -1; /* each time! */ | |
237 | if (remove_module(d->d_name, 0) == 0) | |
238 | printf("Module %s removed.\n", d->d_name); | |
239 | } | |
240 | closedir(moddir); | |
241 | } | |
242 | } | |
243 | ||
244 | static int remove_module(const char *name, int verb) | |
245 | { | |
246 | int i, ret; | |
247 | enum { | |
248 | MAX_EINTR_TRIES = 5 | |
249 | }; | |
250 | ||
251 | dbug(2, "%s\n", name); | |
252 | ||
253 | #ifdef PR_SET_NAME | |
254 | /* Make self easier to identify in vmcrash images */ | |
255 | prctl (PR_SET_NAME, "staprun-d"); | |
256 | #endif | |
257 | ||
258 | (void) verb; /* XXX: ignore */ | |
259 | ||
260 | if (strcmp(name, "*") == 0) { | |
261 | remove_all_modules(); | |
262 | return 0; | |
263 | } | |
264 | ||
265 | /* We call init_ctl_channel/close_ctl_channel to check whether | |
266 | the module is a systemtap-built one (having the right files), | |
267 | and that it's already unattached (because otherwise it'd EBUSY | |
268 | the opens, and that it belongs to our uid (because otherwise | |
269 | a faccessat(2) test on the .cmd file will fail). */ | |
270 | ret = init_ctl_channel (name, 0); | |
271 | if (ret < 0) { | |
272 | err("'%s' is not a zombie systemtap module.\n", name); | |
273 | return ret; | |
274 | } | |
275 | close_ctl_channel (); | |
276 | ||
277 | dbug(2, "removing module %s\n", name); | |
278 | PROBE1(staprun, remove__module, name); | |
279 | ||
280 | for (i = 0; i < MAX_EINTR_TRIES; i++) { | |
281 | ret = delete_module (name, O_NONBLOCK); | |
282 | if (ret == 0 || (errno != EINTR && errno != EWOULDBLOCK)) | |
283 | break; | |
284 | usleep(100 * i); | |
285 | } | |
286 | ||
287 | if (ret != 0) { | |
288 | err("Couldn't remove module '%s': %s.\n", name, strerror(errno)); | |
289 | return 1; | |
290 | } | |
291 | ||
292 | dbug(1, "Module %s removed.\n", name); | |
293 | return 0; | |
294 | } | |
295 | ||
296 | ||
297 | /* As per PR13193 & PR1548, some kernels have a buggy | |
298 | kprobes-optimization code, which results in BUG/panics in certain | |
299 | circumstances. We turn off kprobes optimization as a conservative | |
300 | measure, unless told otherwise by an environment variable. | |
301 | */ | |
302 | void disable_kprobes_optimization() | |
303 | { | |
304 | /* Test if the file exists at all. */ | |
305 | const char* proc_kprobes = "/proc/sys/debug/kprobes-optimization"; | |
306 | char prev; | |
307 | int rc, fd; | |
308 | struct utsname uts; | |
309 | ||
310 | /* PR13814; disable this facility for new enough kernels, containing | |
311 | * these fix commits: 86b4ce31 46484688 3f33ab1c */ | |
312 | /* PR15484; whoops, not enough, problem still seen on Debian | |
313 | * 3.8.12 kernel. */ | |
314 | if (0 && (uname (&uts) == 0) && (strverscmp (uts.release, "3.4") >= 0)) | |
315 | return; | |
316 | /* Disable kprobes optimization due to problems seen on F29 5.0 kernel. | |
317 | PR24416; RCU hang detection with uprobes_onthefly.exp. */ | |
318 | /* RHBZ1697531 - x86 kprobe optimization causes rcu hang */ | |
319 | if ((0 && uname (&uts) == 0) && (strverscmp (uts.release, "4.8") >= 0)) | |
320 | return; | |
321 | ||
322 | if (getenv ("STAP_PR13193_OVERRIDE")) | |
323 | return; | |
324 | ||
325 | /* See the initial state; if it's already disabled, we do nothing. */ | |
326 | fd = open (proc_kprobes, O_RDONLY); | |
327 | if (fd < 0) | |
328 | return; | |
329 | rc = read (fd, &prev, sizeof(prev)); | |
330 | (void) close (fd); | |
331 | if (rc < 1 || prev == '0') /* Already disabled or unavailable */ | |
332 | return; | |
333 | ||
334 | fd = open (proc_kprobes, O_WRONLY); | |
335 | if (fd < 0) | |
336 | return; | |
337 | prev = '0'; /* really, next */ | |
338 | rc = write (fd, &prev, sizeof(prev)); | |
339 | (void) close (fd); | |
340 | if (rc == 1) | |
341 | dbug(1, "Disabled %s.\n", proc_kprobes); | |
342 | else | |
343 | dbug(1, "Error %d/%d disabling %s.\n", rc, errno, proc_kprobes); | |
344 | } | |
345 | ||
346 | ||
347 | /* BZ1552745: /proc/sys/kernel/kptr_restrict makes /sys/module | |
348 | ... addresses unreliable on 2018+ kernels. circumstances. We | |
349 | tweak this security measure (setting it to '1'), unless told | |
350 | otherwise by an environment variable. We could turn it back later, | |
351 | but this would create a race condition between concurrent runs of | |
352 | staprun. The '1' setting is nominally more secure than the default | |
353 | '0', except that for /sys/module/$MODULE/sections/$SECTION the '0' | |
354 | case produces obfuscated 0-based pointers, and '1' produces good | |
355 | ones (to a root user). Strange but true. | |
356 | */ | |
357 | void tweak_kptr_restrict() | |
358 | { | |
359 | const char* proc_kptr = "/proc/sys/kernel/kptr_restrict"; | |
360 | char prev; | |
361 | int rc, fd; | |
362 | struct utsname uts; | |
363 | ||
364 | /* Relevant change appears to have been introduced in v4.15 in | |
365 | * commit ef0010a30935de4e0211. */ | |
366 | if ((uname (&uts) == 0) && (strverscmp (uts.release, "4.15") < 0)) | |
367 | return; | |
368 | ||
369 | if (getenv ("STAP_BZ1552745_OVERRIDE")) | |
370 | return; | |
371 | ||
372 | /* See the initial state; if it's already set, we do nothing. */ | |
373 | fd = open (proc_kptr, O_RDONLY); | |
374 | if (fd < 0) | |
375 | return; | |
376 | rc = read (fd, &prev, sizeof(prev)); | |
377 | (void) close (fd); | |
378 | if (rc < 1 || prev == '1') /* Already set or unavailable */ | |
379 | return; | |
380 | ||
381 | fd = open (proc_kptr, O_WRONLY); | |
382 | if (fd < 0) | |
383 | return; | |
384 | prev = '1'; /* really, next */ | |
385 | rc = write (fd, &prev, sizeof(prev)); | |
386 | (void) close (fd); | |
387 | if (rc == 1) | |
388 | dbug(1, "Set %s.\n", proc_kptr); | |
389 | else | |
390 | dbug(1, "Error %d/%d setting %s.\n", rc, errno, proc_kptr); | |
391 | } | |
392 | ||
393 | ||
394 | ||
395 | int init_staprun(void) | |
396 | { | |
397 | privilege_t user_credentials = pr_unknown; | |
398 | int rc; | |
399 | dbug(2, "init_staprun\n"); | |
400 | ||
401 | if (mountfs() < 0) | |
402 | return -1; | |
403 | ||
404 | rc = 0; | |
405 | if (delete_mod) | |
406 | exit(remove_module(modname, 1)); | |
407 | if (attach_mod) { | |
408 | /* PR14245: prime the relay_basedir_fd pump. */ | |
409 | rc = init_ctl_channel (modname, 0); | |
410 | if (rc >= 0) | |
411 | close_ctl_channel (); | |
412 | } else /* if (!attach_mod) */ { | |
413 | if (need_uprobes && enable_uprobes() != 0) | |
414 | return -1; | |
415 | ||
416 | disable_kprobes_optimization(); | |
417 | ||
418 | if (insert_stap_module(& user_credentials) < 0) { | |
419 | if(!rename_mod && errno == EEXIST) | |
420 | err("Rerun with staprun option '-R' to rename this module.\n"); | |
421 | return -1; | |
422 | } | |
423 | rc = init_ctl_channel (modname, 0); | |
424 | if (rc >= 0) { | |
425 | /* If we are unable to send privilege credentials then we have an old | |
426 | (pre 1.7) stap module or a non-stap module. In either case, the privilege | |
427 | credentials required for loading the module have already been determined and | |
428 | checked (see check_groups, get_module_required_credentials). | |
429 | */ | |
430 | send_privilege_credentials(user_credentials); | |
431 | rc = send_relocations(); | |
432 | if (rc == 0) { | |
433 | rc = send_tzinfo(); | |
434 | if (rc == 0 && remote_id >= 0) | |
435 | send_remote_id(); | |
436 | } | |
437 | close_ctl_channel (); | |
438 | } | |
439 | if (rc != 0) | |
440 | remove_module(modname, 1); | |
441 | } | |
442 | return rc; | |
443 | } | |
444 | ||
445 | int main(int argc, char **argv) | |
446 | { | |
447 | int rc; | |
448 | ||
449 | /* Force libc to make our stderr messages atomic by enabling line | |
450 | buffering since stderr is unbuffered by default. Without this, libc | |
451 | is at liberty to split a single stderr message into multiple writes | |
452 | to the fd while holding flockfile(stderr). POSIX only guarantees that | |
453 | a single write(2) is atomic; chaining several write(2) calls together | |
454 | won't be atomic, and we don't want libc to do that within a single | |
455 | *fprintf(stderr) call since it'll mangle messages printed across | |
456 | different processes (*not* threads). */ | |
457 | setlinebuf(stderr); | |
458 | ||
459 | /* NB: Don't do the geteuid()!=0 check here, since we want to | |
460 | test command-line error-handling while running non-root. */ | |
461 | /* Get rid of a few standard environment variables (which */ | |
462 | /* might cause us to do unintended things). */ | |
463 | rc = unsetenv("IFS") || unsetenv("CDPATH") || unsetenv("ENV") | |
464 | || unsetenv("BASH_ENV"); | |
465 | if (rc) { | |
466 | _perr("unsetenv failed"); | |
467 | exit(-1); | |
468 | } | |
469 | ||
470 | if (getuid() != geteuid()) { /* setuid? */ | |
471 | rc = unsetenv("SYSTEMTAP_STAPRUN") || | |
472 | unsetenv("SYSTEMTAP_STAPIO") || | |
473 | unsetenv("SYSTEMTAP_RUNTIME"); | |
474 | ||
475 | if (rc) { | |
476 | _perr("unsetenv failed"); | |
477 | exit(-1); | |
478 | } | |
479 | } | |
480 | ||
481 | setup_signals(); | |
482 | setup_term_signals(); | |
483 | ||
484 | parse_args(argc, argv); | |
485 | ||
486 | /* PR14245, For security reasons, preclude "staprun -F fd". | |
487 | The -F option is only for stapio, but the overzealous quest | |
488 | for commonality doesn't let us express that nicer. */ | |
489 | if (relay_basedir_fd >= 0) { | |
490 | err(_("Relay basedir -F option is invalid for staprun\n")); | |
491 | exit(1); | |
492 | } | |
493 | /* NB: later on, some of our own code may set relay_basedir_fd, for | |
494 | passing onto stapio - or for our own reuse. That's OK. */ | |
495 | ||
496 | ||
497 | if (buffer_size) | |
498 | dbug(2, "Using a buffer of %u MB.\n", buffer_size); | |
499 | ||
500 | int mod_optind = optind; | |
501 | if (optind < argc) { | |
502 | parse_modpath(argv[optind++]); | |
503 | dbug(2, "modpath=\"%s\", modname=\"%s\"\n", modpath, modname); | |
504 | } | |
505 | ||
506 | if (optind < argc) { | |
507 | if (attach_mod) { | |
508 | err("Cannot have module options with attach (-A).\n"); | |
509 | usage(argv[0],1); | |
510 | } else { | |
511 | unsigned start_idx = 0; | |
512 | while (optind < argc && start_idx + 1 < MAXMODOPTIONS) | |
513 | modoptions[start_idx++] = argv[optind++]; | |
514 | modoptions[start_idx] = NULL; | |
515 | } | |
516 | } | |
517 | ||
518 | if (modpath == NULL || *modpath == '\0') { | |
519 | err("Need a module name or path to load.\n"); | |
520 | usage(argv[0],1); | |
521 | } | |
522 | ||
523 | if (geteuid() != 0) { | |
524 | err("The effective user ID of staprun must be set to the root user.\n" | |
525 | " Check permissions on staprun and ensure it is a setuid root program.\n"); | |
526 | exit(1); | |
527 | } | |
528 | ||
529 | char verbose_level[33]; | |
530 | sprintf(verbose_level, "%d", verbose); | |
531 | rc = setenv("SYSTEMTAP_VERBOSE", verbose_level, 0); | |
532 | if (rc) { | |
533 | _perr("SYSTEMTAP_VERBOSE setenv failed"); | |
534 | exit(-1); | |
535 | } | |
536 | ||
537 | if (init_staprun()) | |
538 | exit(1); | |
539 | ||
540 | argv[0] = getenv ("SYSTEMTAP_STAPIO") ?: PKGLIBDIR "/stapio"; | |
541 | ||
542 | /* Copy nenamed modname into argv */ | |
543 | if(rename_mod) | |
544 | argv[mod_optind] = modname; | |
545 | ||
546 | /* PR14245: pass -F fd to stapio. Unfortunately, this requires | |
547 | us to extend argv[], with all the C fun that entails. */ | |
548 | #ifdef HAVE_OPENAT | |
549 | if (relay_basedir_fd >= 0) { | |
550 | char ** new_argv = calloc(argc+2, sizeof(char *)); | |
551 | const int new_Foption_size = 10; /* -FNNNNN */ | |
552 | char * new_Foption = malloc(new_Foption_size); | |
553 | int i; | |
554 | ||
555 | if (new_argv && new_Foption) { | |
556 | snprintf (new_Foption, new_Foption_size, "-F%d", relay_basedir_fd); | |
557 | for (i=0; i < argc && argv[i] != NULL; i++) | |
558 | new_argv[i] = argv[i]; | |
559 | new_argv[i++] = new_Foption; /* overwrite the NULL */ | |
560 | new_argv[i++] = NULL; /* ensconce a new NULL */ | |
561 | ||
562 | argv = new_argv; | |
563 | } | |
564 | } | |
565 | #endif | |
566 | ||
567 | /* Run stapio */ | |
568 | if (run_as (1, getuid(), getgid(), argv[0], argv) < 0) { | |
569 | perror(argv[0]); | |
570 | goto err; | |
571 | } | |
572 | ||
573 | free(modname); | |
574 | return 0; | |
575 | ||
576 | err: | |
577 | remove_module(modname, 1); | |
578 | free(modname); | |
579 | return 1; | |
580 | } | |
581 | ||
582 | ||
583 | ||
584 | /* Send a variety of relocation-related data to the kernel: for the | |
585 | kernel proper, just the "_stext" symbol address; for all loaded | |
586 | modules, a variety of symbol base addresses. | |
587 | ||
588 | We do this under protest. The kernel ought expose this data to | |
589 | modules such as ourselves, but instead the upstream community | |
590 | continually shrinks its module-facing interfaces, including this | |
591 | stuff, even when users exist. | |
592 | ||
593 | PR26074: as of kernel 5.7+ / commit 0bd476e6c671 and under further | |
594 | protest, we must also send the address of kallsyms_lookup_name and | |
595 | kallsyms_for_each_symbol. | |
596 | */ | |
597 | ||
598 | ||
599 | int send_a_relocation (const char* module, const char* reloc, unsigned long long address) | |
600 | { | |
601 | struct _stp_msg_relocation msg; | |
602 | int rc; | |
603 | ||
604 | if (strlen(module) >= STP_MODULE_NAME_LEN-1) { | |
605 | dbug (1, "module name too long: %s\n", module); | |
606 | return -EINVAL; | |
607 | } | |
608 | strncpy (msg.module, module, STP_MODULE_NAME_LEN - 1); | |
609 | ||
610 | if (strlen(reloc) >= STP_SYMBOL_NAME_LEN-1) { | |
611 | dbug (1, "reloc name too long: %s\n", reloc); | |
612 | return -EINVAL; | |
613 | } | |
614 | strncpy (msg.reloc, reloc, STP_MODULE_NAME_LEN - 1); | |
615 | ||
616 | msg.address = address; | |
617 | ||
618 | rc = send_request (STP_RELOCATION, & msg, sizeof (msg)); | |
619 | if (rc != 0) | |
620 | perror ("Unable to send relocation"); | |
621 | return rc; | |
622 | } | |
623 | ||
624 | ||
625 | int send_relocation_kernel () | |
626 | { | |
627 | FILE* kallsyms; | |
628 | int rc = 0; | |
629 | ||
630 | errno = 0; | |
631 | kallsyms = fopen ("/proc/kallsyms", "r"); | |
632 | if (kallsyms == NULL) | |
633 | { | |
634 | perror("cannot open /proc/kallsyms"); | |
635 | // ... and the kernel module will almost certainly fail to initialize. | |
636 | return errno; | |
637 | } | |
638 | else | |
639 | { | |
640 | int found_stext = 0; | |
641 | int found_kallsyms_lookup_name = 0; | |
642 | int found_kallsyms_on_each_symbol = 0; | |
643 | int found_module_kallsyms_on_each_symbol = 0; | |
644 | int done_with_kallsyms = 0; | |
645 | char *line = NULL; | |
646 | size_t linesz = 0; | |
647 | while (! feof(kallsyms) && !done_with_kallsyms) | |
648 | { | |
649 | ssize_t linesize = getline (& line, & linesz, kallsyms); | |
650 | if (linesize > 0) | |
651 | { | |
652 | unsigned long long address; | |
653 | int pos = -1; | |
654 | if (sscanf (line, "%llx %*c %n", &address, &pos) != 1 | |
655 | || pos == -1) | |
656 | continue; // no symbols here | |
657 | if (linesize - pos == sizeof KERNEL_RELOC_SYMBOL | |
658 | && !strcmp(line + pos, KERNEL_RELOC_SYMBOL "\n")) | |
659 | { | |
660 | /* NB: even on ppc, we use the _stext relocation name. */ | |
661 | rc = send_a_relocation ("kernel", "_stext", address); | |
662 | if (rc != 0) | |
663 | break; | |
664 | ||
665 | found_stext=1; | |
666 | } | |
667 | else if (linesize - pos == sizeof "kallsyms_lookup_name" | |
668 | && !strcmp(line + pos, "kallsyms_lookup_name" "\n")) | |
669 | { | |
670 | rc = send_a_relocation ("kernel", "kallsyms_lookup_name", address); | |
671 | if (rc != 0) // non fatal, follows perror() | |
672 | dbug(1, "Relocation was kallsyms_lookup_name=%llx\n", address); | |
673 | ||
674 | found_kallsyms_lookup_name = 1; | |
675 | } | |
676 | else if (linesize - pos == sizeof "kallsyms_on_each_symbol" | |
677 | && !strcmp(line + pos, "kallsyms_on_each_symbol" "\n")) | |
678 | { | |
679 | rc = send_a_relocation ("kernel", "kallsyms_on_each_symbol", address); | |
680 | if (rc != 0) // non fatal, follows perror() | |
681 | dbug(1, "Relocation was reloc kallsyms_on_each_symbol=%llx\n", address); | |
682 | ||
683 | found_kallsyms_on_each_symbol = 1; | |
684 | } | |
685 | else if (linesize - pos == sizeof "module_kallsyms_on_each_symbol" | |
686 | && !strcmp(line + pos, "module_kallsyms_on_each_symbol" "\n")) | |
687 | { | |
688 | rc = send_a_relocation ("kernel", "module_kallsyms_on_each_symbol", address); | |
689 | if (rc != 0) // non fatal, follows perror() | |
690 | dbug(1, "Relocation was reloc module_kallsyms_on_each_symbol=%llx\n", address); | |
691 | ||
692 | found_module_kallsyms_on_each_symbol = 1; | |
693 | } | |
694 | } | |
695 | done_with_kallsyms = found_stext | |
696 | && found_kallsyms_lookup_name | |
697 | && found_kallsyms_on_each_symbol | |
698 | && found_module_kallsyms_on_each_symbol; | |
699 | } | |
700 | free (line); | |
701 | fclose (kallsyms); | |
702 | ||
703 | /* PR26074: Arguably, failure to find the kallsyms_* symbols may | |
704 | * not be a fatal error. The fallback kallsyms_lookup_name() | |
705 | * function in sym.c then returns 0, but it's barely conceivable | |
706 | * some modules never call it. */ | |
707 | /* if (!done_with_kallsyms) */ | |
708 | if (!found_stext) | |
709 | return rc; | |
710 | ||
711 | /* detect note section, send flag if there | |
712 | * NB: address=2 represents existed note, the real one in _stp_module | |
713 | */ | |
714 | if (!access("/sys/kernel/notes", R_OK)) | |
715 | rc = send_a_relocation ("kernel", ".note.gnu.build-id", 2); | |
716 | } | |
717 | ||
718 | return rc; | |
719 | } | |
720 | ||
721 | ||
722 | int send_relocation_modules () | |
723 | { | |
724 | unsigned i = 0; | |
725 | glob_t globbuf; | |
726 | globbuf.gl_pathc = 0; | |
727 | int r = glob("/sys/module/*/sections/*", GLOB_PERIOD, NULL, &globbuf); | |
728 | ||
729 | if (r == GLOB_NOSPACE || r == GLOB_ABORTED) | |
730 | return r; | |
731 | ||
732 | for (i=0; i<globbuf.gl_pathc; i++) | |
733 | { | |
734 | char *module_section_file; | |
735 | char *section_name; | |
736 | char *module_name; | |
737 | char *module_name_end; | |
738 | FILE* secfile; | |
739 | unsigned long long section_address; | |
740 | ||
741 | module_section_file = globbuf.gl_pathv[i]; | |
742 | ||
743 | /* Tokenize the file name. | |
744 | Sample gl_pathv[]: /sys/modules/zlib_deflate/sections/.text | |
745 | Pieces: ^^^^^^^^^^^^ ^^^^^ | |
746 | */ | |
747 | section_name = strrchr (module_section_file, '/'); | |
748 | if (! section_name) continue; | |
749 | section_name ++; | |
750 | ||
751 | if (!strcmp (section_name, ".")) continue; | |
752 | if (!strcmp (section_name, "..")) continue; | |
753 | ||
754 | module_name = strchr (module_section_file, '/'); | |
755 | if (! module_name) continue; | |
756 | module_name ++; | |
757 | module_name = strchr (module_name, '/'); | |
758 | if (! module_name) continue; | |
759 | module_name ++; | |
760 | module_name = strchr (module_name, '/'); | |
761 | if (! module_name) continue; | |
762 | module_name ++; | |
763 | ||
764 | module_name_end = strchr (module_name, '/'); | |
765 | if (! module_name_end) continue; | |
766 | ||
767 | secfile = fopen (module_section_file, "r"); | |
768 | if (! secfile) continue; | |
769 | ||
770 | if (1 == fscanf (secfile, "0x%llx", §ion_address)) | |
771 | { | |
772 | /* Now we destructively modify the string, but by now the file | |
773 | is open so we won't need the full name again. */ | |
774 | *module_name_end = '\0'; | |
775 | ||
776 | /* PR6503. /sys/module/.../sections/...init.... sometimes contain | |
777 | non-0 addresses, even though the respective module-initialization | |
778 | sections were already unloaded. We override the addresses here. */ | |
779 | if (strstr (section_name, "init.") != NULL) /* .init.text, .devinit.rodata, ... */ | |
780 | section_address = 0; | |
781 | ||
782 | (void) send_a_relocation (module_name, section_name, section_address); | |
783 | /* PR14005: take a pill, dude, a failure with an overlong | |
784 | * name does not call for freaking out. Nor does an error | |
785 | * coming back from the write(2) into the module. We will | |
786 | * just stagger along without that particular module/section | |
787 | * being present in the _stp_sections[] tables. */ | |
788 | } | |
789 | ||
790 | if (strcmp (section_name, ".gnu.linkonce.this_module")) | |
791 | fclose (secfile); | |
792 | else | |
793 | { | |
794 | (void)set_clexec (fileno (secfile)); | |
795 | /* NB: don't fclose this arbitrarily-chosen section file. | |
796 | This forces the kernel to keep a nonzero reference count | |
797 | on the subject module, until staprun exits, by which time | |
798 | the kernel module will have inserted its separate claws | |
799 | into the probeworthy modules. This prevents a race | |
800 | condition where a probe may be just starting up at the | |
801 | same time that a probeworthy module is being unloaded. */ | |
802 | } | |
803 | } | |
804 | ||
805 | globfree (& globbuf); | |
806 | return 0; | |
807 | } | |
808 | ||
809 | ||
810 | ||
811 | int send_relocations () | |
812 | { | |
813 | int rc; | |
814 | ||
815 | tweak_kptr_restrict(); | |
816 | ||
817 | rc = send_relocation_kernel (); | |
818 | if (rc == 0) | |
819 | rc = send_relocation_modules (); | |
820 | return rc; | |
821 | } | |
822 | ||
823 | ||
824 | int send_tzinfo () | |
825 | { | |
826 | struct _stp_msg_tzinfo tzi; | |
827 | time_t now_t; | |
828 | struct tm* now; | |
829 | int rc; | |
830 | ||
831 | /* NB: This is not good enough; it sends DST-unaware numbers. */ | |
832 | #if 0 | |
833 | tzset (); | |
834 | tzi.tz_gmtoff = timezone; | |
835 | strncpy (tzi.tz_name, tzname[0], STP_TZ_NAME_LEN - 1); | |
836 | #endif | |
837 | ||
838 | time (& now_t); | |
839 | now = localtime (& now_t); | |
840 | tzi.tz_gmtoff = - now->tm_gmtoff; | |
841 | strncpy (tzi.tz_name, now->tm_zone, STP_TZ_NAME_LEN - 1); | |
842 | ||
843 | rc = send_request(STP_TZINFO, & tzi, sizeof(tzi)); | |
844 | if (rc != 0) | |
845 | perror ("Unable to send time zone information"); | |
846 | return rc; | |
847 | } | |
848 | ||
849 | int send_privilege_credentials (privilege_t user_credentials) | |
850 | { | |
851 | struct _stp_msg_privilege_credentials pc; | |
852 | int rc; | |
853 | pc.pc_group_mask = user_credentials; | |
854 | rc = send_request(STP_PRIVILEGE_CREDENTIALS, & pc, sizeof(pc)); | |
855 | if (rc != 0) { | |
856 | /* Not an error. Happens when pre 1.7 modules are loaded. */ | |
857 | dbug (1, "Unable to send user privilege credentials\n"); | |
858 | } | |
859 | return rc; | |
860 | } | |
861 | ||
862 | int send_remote_id () | |
863 | { | |
864 | struct _stp_msg_remote_id rem; | |
865 | int rc; | |
866 | ||
867 | rem.remote_id = remote_id; | |
868 | strncpy (rem.remote_uri, remote_uri, STP_REMOTE_URI_LEN - 1); | |
869 | rem.remote_uri [STP_REMOTE_URI_LEN-1]='\0'; /* XXX: quietly truncate */ | |
870 | rc = send_request(STP_REMOTE_ID, & rem, sizeof(rem)); | |
871 | if (rc != 0) | |
872 | perror ("Unable to send remote id"); | |
873 | return rc; | |
874 | } |