]>
Commit | Line | Data |
---|---|---|
a846e9cd MH |
1 | /* -*- linux-c -*- |
2 | * | |
337cd273 | 3 | * staprun.c - SystemTap module loader |
a846e9cd | 4 | * |
82fc46ce | 5 | * Copyright (C) 2005-2019 Red Hat, Inc. |
aa2b3583 | 6 | * |
a846e9cd MH |
7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
e8daaf60 | 18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
a846e9cd | 19 | * |
a846e9cd MH |
20 | */ |
21 | ||
d62c7736 FCE |
22 | #define _XOPEN_SOURCE |
23 | #define _BSD_SOURCE | |
c6278d01 | 24 | #define _DEFAULT_SOURCE |
a846e9cd | 25 | #include "staprun.h" |
02311ab4 | 26 | #include "../privilege.h" |
ea15e536 | 27 | #include "../runtime/k_syms.h" |
6bedc77b | 28 | #include <string.h> |
37ddf6e5 | 29 | #include <sys/uio.h> |
df00639d | 30 | #include <glob.h> |
d62c7736 | 31 | #include <time.h> |
a399ff28 | 32 | #include <unistd.h> |
34d9471d | 33 | #include <sys/prctl.h> |
9eb83dc6 | 34 | #include <sys/utsname.h> |
5eddf13b | 35 | |
5eddf13b DS |
36 | /* used in dbug, _err and _perr */ |
37 | char *__name__ = "staprun"; | |
38 | ||
39 | extern long delete_module(const char *, unsigned int); | |
40 | ||
37ddf6e5 | 41 | int send_relocations (); |
933e53b0 | 42 | int send_tzinfo (); |
429a4963 | 43 | int send_privilege_credentials (privilege_t user_credentials); |
933e53b0 | 44 | int send_remote_id (); |
37ddf6e5 | 45 | |
cedf63f3 DS |
46 | static int remove_module(const char *name, int verb); |
47 | ||
48 | static int stap_module_inserted = -1; | |
49 | ||
50 | static void term_signal_handler(int signum __attribute ((unused))) | |
51 | { | |
52 | if (stap_module_inserted == 0) { | |
cedf63f3 DS |
53 | remove_module(modname, 1); |
54 | free(modname); | |
55 | } | |
56 | _exit(1); | |
57 | } | |
58 | ||
59 | void setup_term_signals(void) | |
60 | { | |
61 | sigset_t s; | |
62 | struct sigaction a; | |
63 | ||
64 | /* blocking all signals while we set things up */ | |
65 | sigfillset(&s); | |
66 | sigprocmask(SIG_SETMASK, &s, NULL); | |
67 | ||
68 | /* handle signals */ | |
69 | memset(&a, 0, sizeof(a)); | |
70 | sigfillset(&a.sa_mask); | |
71 | a.sa_handler = term_signal_handler; | |
72 | sigaction(SIGHUP, &a, NULL); | |
73 | sigaction(SIGINT, &a, NULL); | |
74 | sigaction(SIGTERM, &a, NULL); | |
75 | sigaction(SIGQUIT, &a, NULL); | |
76 | ||
77 | /* unblock all signals */ | |
78 | sigemptyset(&s); | |
79 | sigprocmask(SIG_SETMASK, &s, NULL); | |
80 | } | |
37ddf6e5 | 81 | |
337cd273 | 82 | static int run_as(int exec_p, uid_t uid, gid_t gid, const char *path, char *const argv[]) |
5d65678d | 83 | { |
5eddf13b DS |
84 | pid_t pid; |
85 | int rstatus; | |
5d65678d | 86 | |
6274464e JK |
87 | if (verbose >= 2) { |
88 | int i = 0; | |
11bfd0cc | 89 | eprintf(exec_p ? "execing: ": "spawning: "); |
6274464e | 90 | while (argv[i]) { |
11bfd0cc | 91 | eprintf("%s ", argv[i]); |
6274464e JK |
92 | i++; |
93 | } | |
11bfd0cc | 94 | eprintf("\n"); |
6274464e JK |
95 | } |
96 | ||
337cd273 FCE |
97 | if (exec_p) |
98 | pid = 0; | |
99 | else | |
100 | pid = fork(); | |
101 | ||
102 | if (pid < 0) | |
103 | { | |
104 | _perr("fork"); | |
105 | return -1; | |
106 | } | |
107 | ||
108 | if (pid == 0) /* child process, or exec_p */ | |
109 | { | |
110 | /* Make sure we run as the full user. If we're | |
111 | * switching to a non-root user, this won't allow | |
112 | * that process to switch back to root (since the | |
113 | * original process is setuid). */ | |
114 | if (setresgid (gid, gid, gid) < 0) { | |
115 | _perr("setresgid"); | |
116 | exit(1); | |
117 | } | |
118 | if (setresuid (uid, uid, uid) < 0) { | |
119 | _perr("setresuid"); | |
120 | exit(1); | |
121 | } | |
5eddf13b | 122 | |
337cd273 FCE |
123 | /* Actually run the command. */ |
124 | if (execv(path, argv) < 0) | |
125 | perror(path); | |
126 | _exit(1); | |
127 | } | |
5eddf13b DS |
128 | |
129 | if (waitpid(pid, &rstatus, 0) < 0) | |
337cd273 | 130 | return -1; |
5eddf13b DS |
131 | |
132 | if (WIFEXITED(rstatus)) | |
337cd273 | 133 | return WEXITSTATUS(rstatus); |
5eddf13b DS |
134 | return -1; |
135 | } | |
136 | ||
6274464e JK |
137 | /* |
138 | * Module to be inserted has one or more user-space probes. Make sure | |
139 | * uprobes is enabled. | |
140 | * If /proc/kallsyms lists a symbol in uprobes (e.g. unregister_uprobe), | |
141 | * we're done. | |
142 | * Else try "modprobe uprobes" to load the uprobes module (if any) | |
143 | * built with the kernel. | |
144 | * If that fails, load the uprobes module built in runtime/uprobes. | |
145 | */ | |
146 | static int enable_uprobes(void) | |
147 | { | |
6274464e | 148 | char *argv[10]; |
73dc0c77 | 149 | char runtimeko[2048]; |
a53e79a9 FCE |
150 | int rc; |
151 | ||
152 | /* Formerly, we did a grep /proc/kallsyms search to see if | |
153 | uprobes was already loaded into the kernel. But this is | |
154 | a race waiting to happen. Just try to load the thing. | |
155 | Quietly accept a -EEXIST error. */ | |
6274464e | 156 | |
b7565b41 | 157 | /* NB: don't use /sbin/modprobe, without more env. sanitation. */ |
6274464e | 158 | |
474d17ad DB |
159 | /* Try the specified module or the one from the runtime. */ |
160 | if (uprobes_path) | |
161 | snprintf (runtimeko, sizeof(runtimeko), "%s", uprobes_path); | |
162 | else | |
a53e79a9 FCE |
163 | /* NB: since PR5163, share/runtime/uprobes/uprobes.ko is not built |
164 | by systemtap. */ | |
474d17ad DB |
165 | snprintf (runtimeko, sizeof(runtimeko), "%s/uprobes/uprobes.ko", |
166 | (getenv("SYSTEMTAP_RUNTIME") ?: PKGDATADIR "/runtime")); | |
167 | dbug(2, "Inserting uprobes module from %s.\n", runtimeko); | |
64211010 | 168 | /* This module may be signed, so use insert_module to load it. */ |
64211010 | 169 | argv[0] = NULL; |
a53e79a9 | 170 | |
429a4963 | 171 | rc = insert_module(runtimeko, NULL, argv, assert_uprobes_module_permissions, NULL); |
a53e79a9 FCE |
172 | if ((rc == 0) || /* OK */ |
173 | (rc == -EEXIST)) /* Someone else might have loaded it */ | |
770e94e8 | 174 | return 0; |
73dc0c77 | 175 | |
11bfd0cc | 176 | err("Couldn't insert module '%s': %s\n", runtimeko, moderror(errno)); |
770e94e8 | 177 | return 1; /* failure */ |
6274464e JK |
178 | } |
179 | ||
429a4963 | 180 | static int insert_stap_module(privilege_t *user_credentials) |
6274464e | 181 | { |
a2422e70 | 182 | char special_options[128]; |
710f5084 FCE |
183 | int rc, fips_mode_fd; |
184 | char fips_mode = '0'; | |
185 | char *misc = ""; | |
c94a9cb3 | 186 | |
5ffdc7b0 | 187 | /* Add the _stp_bufsize option. */ |
f2013cc9 | 188 | if (snprintf_chk(special_options, sizeof (special_options), |
f245e619 | 189 | "_stp_bufsize=%d", (int)buffer_size)) |
6274464e | 190 | return -1; |
a2422e70 | 191 | |
710f5084 FCE |
192 | fips_mode_fd = open("/proc/sys/crypto/fips_enabled", O_RDONLY); |
193 | if (fips_mode_fd >= 0) { | |
194 | char c; | |
195 | rc = read(fips_mode_fd, &c, 1); | |
196 | if (rc == 1) fips_mode = c; | |
197 | close (fips_mode_fd); | |
198 | } | |
199 | ||
200 | /* In FIPS mode, a kernel may panic if given an improperly-signed module. | |
201 | Right now, we have no way of signing them with the kernel build-time keys, | |
202 | so we punt. See also SecureBoot. */ | |
203 | if ((fips_mode != '0') && !getenv("STAP_FIPS_OVERRIDE")) { | |
204 | errno = EPERM; | |
205 | stap_module_inserted = -1; | |
206 | misc = "in FIPS mode "; | |
207 | } else { | |
208 | stap_module_inserted = insert_module(modpath, special_options, | |
209 | modoptions, | |
210 | assert_stap_module_permissions, | |
211 | user_credentials); | |
212 | } | |
a53e79a9 | 213 | if (stap_module_inserted != 0) |
710f5084 | 214 | err("Couldn't insert module %s'%s': %s\n", misc, modpath, moderror(errno)); |
cedf63f3 | 215 | return stap_module_inserted; |
6274464e JK |
216 | } |
217 | ||
b197bf0b MH |
218 | static void remove_all_modules(void) |
219 | { | |
220 | char *base; | |
221 | struct statfs st; | |
222 | struct dirent *d; | |
223 | DIR *moddir; | |
224 | ||
c5f7c84b | 225 | /* NB: nothing to do with PR14245 */ |
b197bf0b MH |
226 | if (statfs("/sys/kernel/debug", &st) == 0 && (int)st.f_type == (int)DEBUGFS_MAGIC) |
227 | base = "/sys/kernel/debug/systemtap"; | |
228 | else | |
229 | base = "/proc/systemtap"; | |
230 | ||
231 | moddir = opendir(base); | |
232 | if (moddir) { | |
14a09f02 FCE |
233 | while ((d = readdir(moddir))) { |
234 | if (strcmp(d->d_name, ".") == 0) continue; | |
235 | if (strcmp(d->d_name, "..") == 0) continue; | |
236 | relay_basedir_fd = -1; /* each time! */ | |
b197bf0b MH |
237 | if (remove_module(d->d_name, 0) == 0) |
238 | printf("Module %s removed.\n", d->d_name); | |
14a09f02 | 239 | } |
b197bf0b MH |
240 | closedir(moddir); |
241 | } | |
242 | } | |
243 | ||
244 | static int remove_module(const char *name, int verb) | |
245 | { | |
a399ff28 YZ |
246 | int i, ret; |
247 | enum { | |
248 | MAX_EINTR_TRIES = 5 | |
249 | }; | |
250 | ||
b197bf0b MH |
251 | dbug(2, "%s\n", name); |
252 | ||
4870be66 | 253 | #ifdef PR_SET_NAME |
34d9471d FCE |
254 | /* Make self easier to identify in vmcrash images */ |
255 | prctl (PR_SET_NAME, "staprun-d"); | |
4870be66 | 256 | #endif |
34d9471d | 257 | |
c4ca2da6 FCE |
258 | (void) verb; /* XXX: ignore */ |
259 | ||
b197bf0b MH |
260 | if (strcmp(name, "*") == 0) { |
261 | remove_all_modules(); | |
262 | return 0; | |
263 | } | |
264 | ||
b7565b41 FCE |
265 | /* We call init_ctl_channel/close_ctl_channel to check whether |
266 | the module is a systemtap-built one (having the right files), | |
267 | and that it's already unattached (because otherwise it'd EBUSY | |
14a09f02 FCE |
268 | the opens, and that it belongs to our uid (because otherwise |
269 | a faccessat(2) test on the .cmd file will fail). */ | |
b7565b41 FCE |
270 | ret = init_ctl_channel (name, 0); |
271 | if (ret < 0) { | |
11bfd0cc | 272 | err("'%s' is not a zombie systemtap module.\n", name); |
b7565b41 FCE |
273 | return ret; |
274 | } | |
275 | close_ctl_channel (); | |
b197bf0b MH |
276 | |
277 | dbug(2, "removing module %s\n", name); | |
0f5d597d | 278 | PROBE1(staprun, remove__module, name); |
a399ff28 YZ |
279 | |
280 | for (i = 0; i < MAX_EINTR_TRIES; i++) { | |
281 | ret = delete_module (name, O_NONBLOCK); | |
166a9508 | 282 | if (ret == 0 || (errno != EINTR && errno != EWOULDBLOCK)) |
a399ff28 YZ |
283 | break; |
284 | usleep(100 * i); | |
285 | } | |
286 | ||
b197bf0b | 287 | if (ret != 0) { |
11bfd0cc | 288 | err("Couldn't remove module '%s': %s.\n", name, strerror(errno)); |
b197bf0b MH |
289 | return 1; |
290 | } | |
291 | ||
292 | dbug(1, "Module %s removed.\n", name); | |
293 | return 0; | |
294 | } | |
5eddf13b | 295 | |
cd125e94 | 296 | |
91872f52 FCE |
297 | /* As per PR13193 & PR1548, some kernels have a buggy |
298 | kprobes-optimization code, which results in BUG/panics in certain | |
299 | circumstances. We turn off kprobes optimization as a conservative | |
300 | measure, unless told otherwise by an environment variable. | |
cd125e94 FCE |
301 | */ |
302 | void disable_kprobes_optimization() | |
303 | { | |
304 | /* Test if the file exists at all. */ | |
305 | const char* proc_kprobes = "/proc/sys/debug/kprobes-optimization"; | |
306 | char prev; | |
307 | int rc, fd; | |
9eb83dc6 FCE |
308 | struct utsname uts; |
309 | ||
ee13a5e0 JS |
310 | /* PR13814; disable this facility for new enough kernels, containing |
311 | * these fix commits: 86b4ce31 46484688 3f33ab1c */ | |
91872f52 FCE |
312 | /* PR15484; whoops, not enough, problem still seen on Debian |
313 | * 3.8.12 kernel. */ | |
314 | if (0 && (uname (&uts) == 0) && (strverscmp (uts.release, "3.4") >= 0)) | |
9eb83dc6 | 315 | return; |
288c5389 WC |
316 | /* Disable kprobes optimization due to problems seen on F29 5.0 kernel. |
317 | PR24416; RCU hang detection with uprobes_onthefly.exp. */ | |
318 | /* RHBZ1697531 - x86 kprobe optimization causes rcu hang */ | |
319 | if ((0 && uname (&uts) == 0) && (strverscmp (uts.release, "4.8") >= 0)) | |
397109b0 | 320 | return; |
cd125e94 FCE |
321 | |
322 | if (getenv ("STAP_PR13193_OVERRIDE")) | |
323 | return; | |
324 | ||
325 | /* See the initial state; if it's already disabled, we do nothing. */ | |
326 | fd = open (proc_kprobes, O_RDONLY); | |
327 | if (fd < 0) | |
328 | return; | |
329 | rc = read (fd, &prev, sizeof(prev)); | |
330 | (void) close (fd); | |
331 | if (rc < 1 || prev == '0') /* Already disabled or unavailable */ | |
332 | return; | |
333 | ||
334 | fd = open (proc_kprobes, O_WRONLY); | |
335 | if (fd < 0) | |
336 | return; | |
337 | prev = '0'; /* really, next */ | |
338 | rc = write (fd, &prev, sizeof(prev)); | |
339 | (void) close (fd); | |
340 | if (rc == 1) | |
341 | dbug(1, "Disabled %s.\n", proc_kprobes); | |
342 | else | |
343 | dbug(1, "Error %d/%d disabling %s.\n", rc, errno, proc_kprobes); | |
344 | } | |
345 | ||
346 | ||
b3aeaf02 FCE |
347 | /* BZ1552745: /proc/sys/kernel/kptr_restrict makes /sys/module |
348 | ... addresses unreliable on 2018+ kernels. circumstances. We | |
349 | tweak this security measure (setting it to '1'), unless told | |
350 | otherwise by an environment variable. We could turn it back later, | |
351 | but this would create a race condition between concurrent runs of | |
352 | staprun. The '1' setting is nominally more secure than the default | |
353 | '0', except that for /sys/module/$MODULE/sections/$SECTION the '0' | |
354 | case produces obfuscated 0-based pointers, and '1' produces good | |
355 | ones (to a root user). Strange but true. | |
356 | */ | |
357 | void tweak_kptr_restrict() | |
358 | { | |
359 | const char* proc_kptr = "/proc/sys/kernel/kptr_restrict"; | |
360 | char prev; | |
361 | int rc, fd; | |
362 | struct utsname uts; | |
363 | ||
364 | /* Relevant change appears to have been introduced in v4.15 in | |
365 | * commit ef0010a30935de4e0211. */ | |
366 | if ((uname (&uts) == 0) && (strverscmp (uts.release, "4.15") < 0)) | |
367 | return; | |
368 | ||
369 | if (getenv ("STAP_BZ1552745_OVERRIDE")) | |
370 | return; | |
371 | ||
372 | /* See the initial state; if it's already set, we do nothing. */ | |
373 | fd = open (proc_kptr, O_RDONLY); | |
374 | if (fd < 0) | |
375 | return; | |
376 | rc = read (fd, &prev, sizeof(prev)); | |
377 | (void) close (fd); | |
378 | if (rc < 1 || prev == '1') /* Already set or unavailable */ | |
379 | return; | |
380 | ||
381 | fd = open (proc_kptr, O_WRONLY); | |
382 | if (fd < 0) | |
383 | return; | |
384 | prev = '1'; /* really, next */ | |
385 | rc = write (fd, &prev, sizeof(prev)); | |
386 | (void) close (fd); | |
387 | if (rc == 1) | |
388 | dbug(1, "Set %s.\n", proc_kptr); | |
389 | else | |
390 | dbug(1, "Error %d/%d setting %s.\n", rc, errno, proc_kptr); | |
391 | } | |
392 | ||
393 | ||
394 | ||
5eddf13b DS |
395 | int init_staprun(void) |
396 | { | |
3c10a1be | 397 | privilege_t user_credentials = pr_unknown; |
5ffdc7b0 | 398 | int rc; |
5eddf13b DS |
399 | dbug(2, "init_staprun\n"); |
400 | ||
401 | if (mountfs() < 0) | |
402 | return -1; | |
403 | ||
5ffdc7b0 | 404 | rc = 0; |
b197bf0b MH |
405 | if (delete_mod) |
406 | exit(remove_module(modname, 1)); | |
00d577a6 FCE |
407 | if (attach_mod) { |
408 | /* PR14245: prime the relay_basedir_fd pump. */ | |
409 | rc = init_ctl_channel (modname, 0); | |
410 | if (rc >= 0) | |
411 | close_ctl_channel (); | |
412 | } else /* if (!attach_mod) */ { | |
6274464e JK |
413 | if (need_uprobes && enable_uprobes() != 0) |
414 | return -1; | |
cd125e94 FCE |
415 | |
416 | disable_kprobes_optimization(); | |
417 | ||
429a4963 | 418 | if (insert_stap_module(& user_credentials) < 0) { |
5c854d7c CM |
419 | if(!rename_mod && errno == EEXIST) |
420 | err("Rerun with staprun option '-R' to rename this module.\n"); | |
2155081e CW |
421 | return -1; |
422 | } | |
5ffdc7b0 | 423 | rc = init_ctl_channel (modname, 0); |
d0297590 | 424 | if (rc >= 0) { |
933e53b0 DB |
425 | /* If we are unable to send privilege credentials then we have an old |
426 | (pre 1.7) stap module or a non-stap module. In either case, the privilege | |
427 | credentials required for loading the module have already been determined and | |
428 | checked (see check_groups, get_module_required_credentials). | |
429 | */ | |
429a4963 | 430 | send_privilege_credentials(user_credentials); |
5ffdc7b0 | 431 | rc = send_relocations(); |
933e53b0 DB |
432 | if (rc == 0) { |
433 | rc = send_tzinfo(); | |
434 | if (rc == 0 && remote_id >= 0) | |
435 | send_remote_id(); | |
5ffdc7b0 DB |
436 | } |
437 | close_ctl_channel (); | |
438 | } | |
d0297590 DB |
439 | if (rc != 0) |
440 | remove_module(modname, 1); | |
5eddf13b | 441 | } |
5ffdc7b0 | 442 | return rc; |
5eddf13b | 443 | } |
a846e9cd MH |
444 | |
445 | int main(int argc, char **argv) | |
446 | { | |
5eddf13b | 447 | int rc; |
83c5b5fe | 448 | |
3df93d4a SA |
449 | /* Force libc to make our stderr messages atomic by enabling line |
450 | buffering since stderr is unbuffered by default. Without this, libc | |
451 | is at liberty to split a single stderr message into multiple writes | |
452 | to the fd while holding flockfile(stderr). POSIX only guarantees that | |
453 | a single write(2) is atomic; chaining several write(2) calls together | |
454 | won't be atomic, and we don't want libc to do that within a single | |
455 | *fprintf(stderr) call since it'll mangle messages printed across | |
456 | different processes (*not* threads). */ | |
457 | setlinebuf(stderr); | |
458 | ||
b197bf0b MH |
459 | /* NB: Don't do the geteuid()!=0 check here, since we want to |
460 | test command-line error-handling while running non-root. */ | |
5eddf13b DS |
461 | /* Get rid of a few standard environment variables (which */ |
462 | /* might cause us to do unintended things). */ | |
463 | rc = unsetenv("IFS") || unsetenv("CDPATH") || unsetenv("ENV") | |
b197bf0b | 464 | || unsetenv("BASH_ENV"); |
5eddf13b DS |
465 | if (rc) { |
466 | _perr("unsetenv failed"); | |
467 | exit(-1); | |
a846e9cd MH |
468 | } |
469 | ||
73dc0c77 | 470 | if (getuid() != geteuid()) { /* setuid? */ |
aad1a79c | 471 | rc = unsetenv("SYSTEMTAP_STAPRUN") || |
73dc0c77 FCE |
472 | unsetenv("SYSTEMTAP_STAPIO") || |
473 | unsetenv("SYSTEMTAP_RUNTIME"); | |
474 | ||
aad1a79c RM |
475 | if (rc) { |
476 | _perr("unsetenv failed"); | |
477 | exit(-1); | |
478 | } | |
479 | } | |
480 | ||
5eddf13b | 481 | setup_signals(); |
cedf63f3 | 482 | setup_term_signals(); |
5eddf13b DS |
483 | |
484 | parse_args(argc, argv); | |
485 | ||
c5f7c84b FCE |
486 | /* PR14245, For security reasons, preclude "staprun -F fd". |
487 | The -F option is only for stapio, but the overzealous quest | |
488 | for commonality doesn't let us express that nicer. */ | |
489 | if (relay_basedir_fd >= 0) { | |
11bfd0cc | 490 | err(_("Relay basedir -F option is invalid for staprun\n")); |
c5f7c84b FCE |
491 | exit(1); |
492 | } | |
493 | /* NB: later on, some of our own code may set relay_basedir_fd, for | |
494 | passing onto stapio - or for our own reuse. That's OK. */ | |
495 | ||
496 | ||
5eddf13b | 497 | if (buffer_size) |
3b5ab982 | 498 | dbug(2, "Using a buffer of %u MB.\n", buffer_size); |
5eddf13b | 499 | |
5c854d7c | 500 | int mod_optind = optind; |
a846e9cd | 501 | if (optind < argc) { |
5eddf13b | 502 | parse_modpath(argv[optind++]); |
83c5b5fe | 503 | dbug(2, "modpath=\"%s\", modname=\"%s\"\n", modpath, modname); |
a846e9cd MH |
504 | } |
505 | ||
b197bf0b | 506 | if (optind < argc) { |
5d65678d | 507 | if (attach_mod) { |
11bfd0cc | 508 | err("Cannot have module options with attach (-A).\n"); |
02aa7a65 | 509 | usage(argv[0],1); |
5d65678d | 510 | } else { |
5eddf13b | 511 | unsigned start_idx = 0; |
b197bf0b | 512 | while (optind < argc && start_idx + 1 < MAXMODOPTIONS) |
5d65678d MH |
513 | modoptions[start_idx++] = argv[optind++]; |
514 | modoptions[start_idx] = NULL; | |
515 | } | |
a846e9cd MH |
516 | } |
517 | ||
5eddf13b | 518 | if (modpath == NULL || *modpath == '\0') { |
11bfd0cc | 519 | err("Need a module name or path to load.\n"); |
02aa7a65 | 520 | usage(argv[0],1); |
a846e9cd MH |
521 | } |
522 | ||
3a9627f6 | 523 | if (geteuid() != 0) { |
11bfd0cc | 524 | err("The effective user ID of staprun must be set to the root user.\n" |
b197bf0b | 525 | " Check permissions on staprun and ensure it is a setuid root program.\n"); |
3a9627f6 FCE |
526 | exit(1); |
527 | } | |
528 | ||
c31d198c LB |
529 | char verbose_level[33]; |
530 | sprintf(verbose_level, "%d", verbose); | |
531 | rc = setenv("SYSTEMTAP_VERBOSE", verbose_level, 0); | |
532 | if (rc) { | |
533 | _perr("SYSTEMTAP_VERBOSE setenv failed"); | |
534 | exit(-1); | |
535 | } | |
536 | ||
5d65678d | 537 | if (init_staprun()) |
a846e9cd | 538 | exit(1); |
a846e9cd | 539 | |
aad1a79c | 540 | argv[0] = getenv ("SYSTEMTAP_STAPIO") ?: PKGLIBDIR "/stapio"; |
5c854d7c CM |
541 | |
542 | /* Copy nenamed modname into argv */ | |
543 | if(rename_mod) | |
544 | argv[mod_optind] = modname; | |
545 | ||
c5f7c84b FCE |
546 | /* PR14245: pass -F fd to stapio. Unfortunately, this requires |
547 | us to extend argv[], with all the C fun that entails. */ | |
548 | #ifdef HAVE_OPENAT | |
549 | if (relay_basedir_fd >= 0) { | |
52596f02 | 550 | char ** new_argv = calloc(argc+2, sizeof(char *)); |
c5f7c84b FCE |
551 | const int new_Foption_size = 10; /* -FNNNNN */ |
552 | char * new_Foption = malloc(new_Foption_size); | |
553 | int i; | |
554 | ||
555 | if (new_argv && new_Foption) { | |
556 | snprintf (new_Foption, new_Foption_size, "-F%d", relay_basedir_fd); | |
684925aa | 557 | for (i=0; i < argc && argv[i] != NULL; i++) |
c5f7c84b FCE |
558 | new_argv[i] = argv[i]; |
559 | new_argv[i++] = new_Foption; /* overwrite the NULL */ | |
560 | new_argv[i++] = NULL; /* ensconce a new NULL */ | |
561 | ||
562 | argv = new_argv; | |
563 | } | |
564 | } | |
565 | #endif | |
566 | ||
5c854d7c | 567 | /* Run stapio */ |
337cd273 | 568 | if (run_as (1, getuid(), getgid(), argv[0], argv) < 0) { |
b197bf0b MH |
569 | perror(argv[0]); |
570 | goto err; | |
571 | } | |
5c854d7c CM |
572 | |
573 | free(modname); | |
a846e9cd | 574 | return 0; |
b197bf0b MH |
575 | |
576 | err: | |
577 | remove_module(modname, 1); | |
5c854d7c | 578 | free(modname); |
b197bf0b | 579 | return 1; |
a846e9cd | 580 | } |
37ddf6e5 FCE |
581 | |
582 | ||
583 | ||
584 | /* Send a variety of relocation-related data to the kernel: for the | |
585 | kernel proper, just the "_stext" symbol address; for all loaded | |
586 | modules, a variety of symbol base addresses. | |
587 | ||
588 | We do this under protest. The kernel ought expose this data to | |
589 | modules such as ourselves, but instead the upstream community | |
590 | continually shrinks its module-facing interfaces, including this | |
591 | stuff, even when users exist. | |
1eaeb16d SM |
592 | |
593 | PR26074: as of kernel 5.7+ / commit 0bd476e6c671 and under further | |
594 | protest, we must also send the address of kallsyms_lookup_name and | |
595 | kallsyms_for_each_symbol. | |
37ddf6e5 FCE |
596 | */ |
597 | ||
598 | ||
933e53b0 | 599 | int send_a_relocation (const char* module, const char* reloc, unsigned long long address) |
37ddf6e5 FCE |
600 | { |
601 | struct _stp_msg_relocation msg; | |
933e53b0 | 602 | int rc; |
37ddf6e5 | 603 | |
7c7b1cd5 | 604 | if (strlen(module) >= STP_MODULE_NAME_LEN-1) { |
adc149b7 | 605 | dbug (1, "module name too long: %s\n", module); |
933e53b0 | 606 | return -EINVAL; |
7c7b1cd5 | 607 | } |
d6c9d87f | 608 | strncpy (msg.module, module, STP_MODULE_NAME_LEN - 1); |
7c7b1cd5 FCE |
609 | |
610 | if (strlen(reloc) >= STP_SYMBOL_NAME_LEN-1) { | |
adc149b7 | 611 | dbug (1, "reloc name too long: %s\n", reloc); |
933e53b0 | 612 | return -EINVAL; |
7c7b1cd5 | 613 | } |
d6c9d87f | 614 | strncpy (msg.reloc, reloc, STP_MODULE_NAME_LEN - 1); |
37ddf6e5 FCE |
615 | |
616 | msg.address = address; | |
617 | ||
933e53b0 DB |
618 | rc = send_request (STP_RELOCATION, & msg, sizeof (msg)); |
619 | if (rc != 0) | |
620 | perror ("Unable to send relocation"); | |
621 | return rc; | |
37ddf6e5 FCE |
622 | } |
623 | ||
624 | ||
625 | int send_relocation_kernel () | |
626 | { | |
933e53b0 DB |
627 | FILE* kallsyms; |
628 | int rc = 0; | |
629 | ||
630 | errno = 0; | |
631 | kallsyms = fopen ("/proc/kallsyms", "r"); | |
37ddf6e5 FCE |
632 | if (kallsyms == NULL) |
633 | { | |
337cd273 | 634 | perror("cannot open /proc/kallsyms"); |
37ddf6e5 | 635 | // ... and the kernel module will almost certainly fail to initialize. |
933e53b0 | 636 | return errno; |
37ddf6e5 FCE |
637 | } |
638 | else | |
639 | { | |
1eaeb16d SM |
640 | int found_stext = 0; |
641 | int found_kallsyms_lookup_name = 0; | |
642 | int found_kallsyms_on_each_symbol = 0; | |
33fae2d0 | 643 | int found_module_kallsyms_on_each_symbol = 0; |
df00639d | 644 | int done_with_kallsyms = 0; |
aa2b3583 RM |
645 | char *line = NULL; |
646 | size_t linesz = 0; | |
df00639d | 647 | while (! feof(kallsyms) && !done_with_kallsyms) |
37ddf6e5 | 648 | { |
37ddf6e5 | 649 | ssize_t linesize = getline (& line, & linesz, kallsyms); |
aa2b3583 | 650 | if (linesize > 0) |
37ddf6e5 FCE |
651 | { |
652 | unsigned long long address; | |
aa2b3583 | 653 | int pos = -1; |
1eaeb16d SM |
654 | if (sscanf (line, "%llx %*c %n", &address, &pos) != 1 |
655 | || pos == -1) | |
656 | continue; // no symbols here | |
657 | if (linesize - pos == sizeof KERNEL_RELOC_SYMBOL | |
aa2b3583 | 658 | && !strcmp(line + pos, KERNEL_RELOC_SYMBOL "\n")) |
37ddf6e5 | 659 | { |
1b94bf6d | 660 | /* NB: even on ppc, we use the _stext relocation name. */ |
933e53b0 DB |
661 | rc = send_a_relocation ("kernel", "_stext", address); |
662 | if (rc != 0) | |
663 | break; | |
37ddf6e5 | 664 | |
1eaeb16d SM |
665 | found_stext=1; |
666 | } | |
667 | else if (linesize - pos == sizeof "kallsyms_lookup_name" | |
668 | && !strcmp(line + pos, "kallsyms_lookup_name" "\n")) | |
669 | { | |
670 | rc = send_a_relocation ("kernel", "kallsyms_lookup_name", address); | |
671 | if (rc != 0) // non fatal, follows perror() | |
672 | dbug(1, "Relocation was kallsyms_lookup_name=%llx\n", address); | |
673 | ||
674 | found_kallsyms_lookup_name = 1; | |
675 | } | |
676 | else if (linesize - pos == sizeof "kallsyms_on_each_symbol" | |
677 | && !strcmp(line + pos, "kallsyms_on_each_symbol" "\n")) | |
678 | { | |
679 | rc = send_a_relocation ("kernel", "kallsyms_on_each_symbol", address); | |
680 | if (rc != 0) // non fatal, follows perror() | |
681 | dbug(1, "Relocation was reloc kallsyms_on_each_symbol=%llx\n", address); | |
682 | ||
683 | found_kallsyms_on_each_symbol = 1; | |
37ddf6e5 | 684 | } |
33fae2d0 MC |
685 | else if (linesize - pos == sizeof "module_kallsyms_on_each_symbol" |
686 | && !strcmp(line + pos, "module_kallsyms_on_each_symbol" "\n")) | |
687 | { | |
688 | rc = send_a_relocation ("kernel", "module_kallsyms_on_each_symbol", address); | |
689 | if (rc != 0) // non fatal, follows perror() | |
690 | dbug(1, "Relocation was reloc module_kallsyms_on_each_symbol=%llx\n", address); | |
691 | ||
692 | found_module_kallsyms_on_each_symbol = 1; | |
693 | } | |
37ddf6e5 | 694 | } |
1eaeb16d SM |
695 | done_with_kallsyms = found_stext |
696 | && found_kallsyms_lookup_name | |
33fae2d0 MC |
697 | && found_kallsyms_on_each_symbol |
698 | && found_module_kallsyms_on_each_symbol; | |
37ddf6e5 | 699 | } |
aa2b3583 | 700 | free (line); |
37ddf6e5 | 701 | fclose (kallsyms); |
1eaeb16d SM |
702 | |
703 | /* PR26074: Arguably, failure to find the kallsyms_* symbols may | |
704 | * not be a fatal error. The fallback kallsyms_lookup_name() | |
705 | * function in sym.c then returns 0, but it's barely conceivable | |
706 | * some modules never call it. */ | |
707 | /* if (!done_with_kallsyms) */ | |
708 | if (!found_stext) | |
709 | return rc; | |
aa2b3583 RM |
710 | |
711 | /* detect note section, send flag if there | |
29495972 | 712 | * NB: address=2 represents existed note, the real one in _stp_module |
aa2b3583 RM |
713 | */ |
714 | if (!access("/sys/kernel/notes", R_OK)) | |
933e53b0 | 715 | rc = send_a_relocation ("kernel", ".note.gnu.build-id", 2); |
37ddf6e5 | 716 | } |
df00639d | 717 | |
933e53b0 | 718 | return rc; |
37ddf6e5 FCE |
719 | } |
720 | ||
721 | ||
933e53b0 | 722 | int send_relocation_modules () |
37ddf6e5 | 723 | { |
555ffd15 | 724 | unsigned i = 0; |
df00639d | 725 | glob_t globbuf; |
555ffd15 | 726 | globbuf.gl_pathc = 0; |
df00639d FCE |
727 | int r = glob("/sys/module/*/sections/*", GLOB_PERIOD, NULL, &globbuf); |
728 | ||
729 | if (r == GLOB_NOSPACE || r == GLOB_ABORTED) | |
933e53b0 | 730 | return r; |
df00639d FCE |
731 | |
732 | for (i=0; i<globbuf.gl_pathc; i++) | |
733 | { | |
734 | char *module_section_file; | |
735 | char *section_name; | |
736 | char *module_name; | |
737 | char *module_name_end; | |
738 | FILE* secfile; | |
739 | unsigned long long section_address; | |
740 | ||
741 | module_section_file = globbuf.gl_pathv[i]; | |
742 | ||
337cd273 | 743 | /* Tokenize the file name. |
df00639d FCE |
744 | Sample gl_pathv[]: /sys/modules/zlib_deflate/sections/.text |
745 | Pieces: ^^^^^^^^^^^^ ^^^^^ | |
746 | */ | |
6bedc77b | 747 | section_name = strrchr (module_section_file, '/'); |
df00639d FCE |
748 | if (! section_name) continue; |
749 | section_name ++; | |
750 | ||
751 | if (!strcmp (section_name, ".")) continue; | |
752 | if (!strcmp (section_name, "..")) continue; | |
337cd273 | 753 | |
6bedc77b | 754 | module_name = strchr (module_section_file, '/'); |
df00639d FCE |
755 | if (! module_name) continue; |
756 | module_name ++; | |
6bedc77b | 757 | module_name = strchr (module_name, '/'); |
df00639d FCE |
758 | if (! module_name) continue; |
759 | module_name ++; | |
6bedc77b | 760 | module_name = strchr (module_name, '/'); |
df00639d FCE |
761 | if (! module_name) continue; |
762 | module_name ++; | |
763 | ||
6bedc77b | 764 | module_name_end = strchr (module_name, '/'); |
df00639d FCE |
765 | if (! module_name_end) continue; |
766 | ||
767 | secfile = fopen (module_section_file, "r"); | |
768 | if (! secfile) continue; | |
769 | ||
770 | if (1 == fscanf (secfile, "0x%llx", §ion_address)) | |
771 | { | |
772 | /* Now we destructively modify the string, but by now the file | |
773 | is open so we won't need the full name again. */ | |
774 | *module_name_end = '\0'; | |
337cd273 | 775 | |
ac08441a FCE |
776 | /* PR6503. /sys/module/.../sections/...init.... sometimes contain |
777 | non-0 addresses, even though the respective module-initialization | |
778 | sections were already unloaded. We override the addresses here. */ | |
779 | if (strstr (section_name, "init.") != NULL) /* .init.text, .devinit.rodata, ... */ | |
780 | section_address = 0; | |
781 | ||
0243596c FCE |
782 | (void) send_a_relocation (module_name, section_name, section_address); |
783 | /* PR14005: take a pill, dude, a failure with an overlong | |
784 | * name does not call for freaking out. Nor does an error | |
785 | * coming back from the write(2) into the module. We will | |
786 | * just stagger along without that particular module/section | |
787 | * being present in the _stp_sections[] tables. */ | |
df00639d FCE |
788 | } |
789 | ||
790 | if (strcmp (section_name, ".gnu.linkonce.this_module")) | |
791 | fclose (secfile); | |
792 | else | |
793 | { | |
22902e73 | 794 | (void)set_clexec (fileno (secfile)); |
df00639d FCE |
795 | /* NB: don't fclose this arbitrarily-chosen section file. |
796 | This forces the kernel to keep a nonzero reference count | |
797 | on the subject module, until staprun exits, by which time | |
798 | the kernel module will have inserted its separate claws | |
799 | into the probeworthy modules. This prevents a race | |
800 | condition where a probe may be just starting up at the | |
801 | same time that a probeworthy module is being unloaded. */ | |
802 | } | |
803 | } | |
337cd273 | 804 | |
df00639d | 805 | globfree (& globbuf); |
ef187c96 | 806 | return 0; |
37ddf6e5 FCE |
807 | } |
808 | ||
809 | ||
810 | ||
811 | int send_relocations () | |
812 | { | |
3c02e16c | 813 | int rc; |
b3aeaf02 FCE |
814 | |
815 | tweak_kptr_restrict(); | |
816 | ||
3c02e16c | 817 | rc = send_relocation_kernel (); |
933e53b0 DB |
818 | if (rc == 0) |
819 | rc = send_relocation_modules (); | |
37ddf6e5 FCE |
820 | return rc; |
821 | } | |
d62c7736 FCE |
822 | |
823 | ||
933e53b0 | 824 | int send_tzinfo () |
d62c7736 | 825 | { |
d62c7736 FCE |
826 | struct _stp_msg_tzinfo tzi; |
827 | time_t now_t; | |
828 | struct tm* now; | |
933e53b0 | 829 | int rc; |
d62c7736 | 830 | |
d62c7736 FCE |
831 | /* NB: This is not good enough; it sends DST-unaware numbers. */ |
832 | #if 0 | |
833 | tzset (); | |
834 | tzi.tz_gmtoff = timezone; | |
d6c9d87f | 835 | strncpy (tzi.tz_name, tzname[0], STP_TZ_NAME_LEN - 1); |
d62c7736 FCE |
836 | #endif |
837 | ||
838 | time (& now_t); | |
839 | now = localtime (& now_t); | |
840 | tzi.tz_gmtoff = - now->tm_gmtoff; | |
d6c9d87f | 841 | strncpy (tzi.tz_name, now->tm_zone, STP_TZ_NAME_LEN - 1); |
d62c7736 | 842 | |
933e53b0 DB |
843 | rc = send_request(STP_TZINFO, & tzi, sizeof(tzi)); |
844 | if (rc != 0) | |
845 | perror ("Unable to send time zone information"); | |
846 | return rc; | |
5ffdc7b0 DB |
847 | } |
848 | ||
429a4963 | 849 | int send_privilege_credentials (privilege_t user_credentials) |
5ffdc7b0 DB |
850 | { |
851 | struct _stp_msg_privilege_credentials pc; | |
933e53b0 | 852 | int rc; |
429a4963 | 853 | pc.pc_group_mask = user_credentials; |
933e53b0 DB |
854 | rc = send_request(STP_PRIVILEGE_CREDENTIALS, & pc, sizeof(pc)); |
855 | if (rc != 0) { | |
856 | /* Not an error. Happens when pre 1.7 modules are loaded. */ | |
adc149b7 | 857 | dbug (1, "Unable to send user privilege credentials\n"); |
933e53b0 DB |
858 | } |
859 | return rc; | |
d62c7736 | 860 | } |
5137a7a9 | 861 | |
933e53b0 | 862 | int send_remote_id () |
5137a7a9 FCE |
863 | { |
864 | struct _stp_msg_remote_id rem; | |
933e53b0 | 865 | int rc; |
5137a7a9 FCE |
866 | |
867 | rem.remote_id = remote_id; | |
d6c9d87f | 868 | strncpy (rem.remote_uri, remote_uri, STP_REMOTE_URI_LEN - 1); |
5137a7a9 | 869 | rem.remote_uri [STP_REMOTE_URI_LEN-1]='\0'; /* XXX: quietly truncate */ |
933e53b0 DB |
870 | rc = send_request(STP_REMOTE_ID, & rem, sizeof(rem)); |
871 | if (rc != 0) | |
872 | perror ("Unable to send remote id"); | |
873 | return rc; | |
5137a7a9 | 874 | } |