]> sourceware.org Git - systemtap.git/blame - runtime/linux/uprobes-common.c
Deal with kernels pre-6.12 kernel that relocated unaligned.h
[systemtap.git] / runtime / linux / uprobes-common.c
CommitLineData
cc52276b
WC
1/* -*- linux-c -*-
2 * uprobe Functions
ef36f781 3 * Copyright (C) 2014 Red Hat Inc.
cc52276b
WC
4 *
5 * This file is part of systemtap, and is free software. You can
6 * redistribute it and/or modify it under the terms of the GNU General
7 * Public License (GPL); either version 2, or (at your option) any
8 * later version.
9 */
10
11#ifndef _UPROBE_COMMON_C_
12#define _UPROBE_COMMON_C_
13
14/* NB: Because these utrace callbacks only occur before / after
15 userspace instructions run, there is no concurrency control issue
16 between active uprobe callbacks and these registration /
17 unregistration pieces.
18
19 We protect the stap_uprobe->spec_index (which also serves as a
20 free/busy flag) value with the outer protective stap_probes_lock
21 spinlock, to protect it against concurrent registration /
22 unregistration.
23*/
24
25static int stap_uprobe_change_plus (struct task_struct *tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf *stf, unsigned long offset, unsigned long vm_flags) {
26 int tfi = (stf - stap_uprobe_finders);
27 int spec_index;
28 /* iterate over stap_uprobe_spec[] that use this same stap_uprobe_tf */
29 for (spec_index=0; spec_index<sizeof(stap_uprobe_specs)/sizeof(stap_uprobe_specs[0]); spec_index++) {
30 int handled_p = 0;
31 int slotted_p = 0;
32 const struct stap_uprobe_spec *sups = &stap_uprobe_specs [spec_index];
33 struct stap_uprobe *sup;
34 pid_t sdt_sem_pid;
35 int rc = 0;
36 int i;
4fa83377
SC
37 int pci;
38
cc52276b
WC
39 if (likely(sups->tfi != tfi)) continue;
40 /* skip probes with an address beyond this map event; should not
41 happen unless a shlib/exec got mmapped in weirdly piecemeal */
77694f53 42 if (likely((vm_flags & VM_EXEC) && sups->address >= length)) continue;
cc52276b
WC
43
44 /* Found a uprobe_spec for this stap_uprobe_tf. Need to lock the
45 stap_uprobes[] array to allocate a free spot, but then we can
46 unlock and do the register_*probe subsequently. */
47
ae1666a0 48 might_sleep();
cc52276b
WC
49 mutex_lock (& stap_uprobes_lock);
50 for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */
51 sup = & stap_uprobes[i];
52
53 /* register new uprobe
54 We make two passes for semaphores;
86229a55 55 see stap_uprobe_change_semaphore_plus */
cc52276b
WC
56
57 if (sup->spec_index < 0 || (sups->sdt_sem_offset && vm_flags & VM_WRITE && sup->spec_index == spec_index)) {
58 #if (UPROBES_API_VERSION < 2)
59 /* See PR6829 comment. */
60 if (sup->spec_index == -1 && sup->up.kdata != NULL) continue;
61 else if (sup->spec_index == -2 && sup->urp.u.kdata != NULL) continue;
62 #endif
63 sup->spec_index = spec_index;
64 slotted_p = 1;
65 break;
66 }
67 }
68 mutex_unlock (& stap_uprobes_lock);
69 #ifdef DEBUG_UPROBES
26e63673 70 _stp_dbug(__FUNCTION__,__LINE__, "+uprobe spec %d idx %d process %s[%d] addr %p pp %s\n", spec_index, (slotted_p ? i : -1), tsk->comm, tsk->tgid, (void*)(relocation+sups->address), sups->probe->pp);
cc52276b 71 #endif
77c59419
FCE
72
73 /* NB: check for user-module build-id only if we have a pathname
74 at all; for a process(PID#).* probe, we may not. If at some
75 point we map process(PID#) to process("/proc/PID#/exe"), we'll
76 get a pathname. */
77 if (stf->pathname)
78 if ((rc = _stp_usermodule_check(tsk, stf->pathname, relocation)))
79 return rc;
cc52276b
WC
80
81 /* Here, slotted_p implies that `i' points to the single
82 stap_uprobes[] element that has been slotted in for registration
83 or unregistration processing. !slotted_p implies that the table
84 was full (registration; MAXUPROBES) or that no matching entry was
85 found (unregistration; should not happen). */
86
87 sdt_sem_pid = (sups->return_p ? sup->urp.u.pid : sup->up.pid);
88 if (sups->sdt_sem_offset && (sdt_sem_pid != tsk->tgid || sup->sdt_sem_address == 0)) {
529c7eae
JS
89 /* If the probe is in an ET_EXEC binary, then the sdt_sem_offset already
90 * is a real address. But stap_uprobe_process_found calls us in this
91 * case with relocation=offset=0, so we don't have to worry about it. */
92 sup->sdt_sem_address = (relocation - offset) + sups->sdt_sem_offset;
cc52276b 93 } /* sdt_sem_offset */
4fa83377
SC
94
95 for (pci=0; pci < sups->perf_counters_dim; pci++) {
0d049a1d
FCE
96 if ((sups->perf_counters)[pci] > -1)
97 _stp_perf_read_init ((sups->perf_counters)[pci], tsk);
4fa83377
SC
98 }
99
cc52276b
WC
100 if (slotted_p) {
101 struct stap_uprobe *sup = & stap_uprobes[i];
102 if (sups->return_p) {
103 sup->urp.u.pid = tsk->tgid;
104 sup->urp.u.vaddr = relocation + sups->address;
105 sup->urp.handler = &enter_uretprobe_probe;
106 rc = register_uretprobe (& sup->urp);
107 } else {
108 sup->up.pid = tsk->tgid;
109 sup->up.vaddr = relocation + sups->address;
110 sup->up.handler = &enter_uprobe_probe;
111 rc = register_uprobe (& sup->up);
112 }
86229a55
DS
113
114 /* The u*probe failed to register. However, if we got EEXIST,
115 * that means that the u*probe is already there, so just ignore
116 * the error. This could happen if CLONE_THREAD or CLONE_VM was
117 * used. */
118 if (rc != 0 && rc != -EEXIST) {
26e63673 119 _stp_warn ("u*probe failed %s[%d] '%s' addr %p rc %d\n", tsk->comm, tsk->tgid, sups->probe->pp, (void*)(relocation + sups->address), rc);
cc52276b
WC
120 /* NB: we need to release this slot,
121 so we need to borrow the mutex temporarily. */
ae1666a0 122 might_sleep();
cc52276b
WC
123 mutex_lock (& stap_uprobes_lock);
124 sup->spec_index = -1;
7554a13f 125 sup->sdt_sem_address = 0;
cc52276b
WC
126 mutex_unlock (& stap_uprobes_lock);
127 } else {
128 handled_p = 1;
129 }
130 }
131 /* NB: handled_p implies slotted_p */
132 if (unlikely (! handled_p)) {
133 #ifdef STP_TIMING
4e1434f6 134 atomic_inc (skipped_count_uprobe_reg());
cc52276b
WC
135 #endif
136 /* NB: duplicates common_entryfn_epilogue,
137 but then this is not a probe entry fn epilogue. */
7acb3e34 138#ifndef STAP_SUPPRESS_HANDLER_ERRORS
228f84ae
MF
139 if (unlikely (atomic_inc_return (skipped_count()) > MAXSKIPPED)) {
140 if (unlikely (pseudo_atomic_cmpxchg(session_state(), STAP_SESSION_RUNNING, STAP_SESSION_ERROR) == STAP_SESSION_RUNNING))
7acb3e34 141 _stp_error ("Skipped too many probes, check MAXSKIPPED or try again with stap -t for more details.");
cc52276b 142 }
228f84ae 143#endif
cc52276b
WC
144 }
145 } /* close iteration over stap_uprobe_spec[] */
146 return 0; /* XXX: or rc? */
147}
148
149static int stap_uprobe_change_semaphore_plus (struct task_struct *tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf *stf) {
150 int tfi = (stf - stap_uprobe_finders);
151 int spec_index;
152 int rc = 0;
153 struct stap_uprobe *sup;
154 int i;
155
156 /* We make two passes for semaphores.
157 The first pass, stap_uprobe_change_plus, calculates the address of the
158 semaphore. If the probe is in a .so, we calculate the
159 address when the initial mmap maps the entire solib, e.g.
160 7f089885a000-7f089885b000 rw-p- libtcl.so
100a540e 161 A subsequent mmap maps in the writable segment where the
cc52276b
WC
162 semaphore control variable lives, e.g.
163 7f089850d000-7f0898647000 r-xp- libtcl.so
164 7f0898647000-7f0898846000 ---p libtcl.so
165 7f0898846000-7f089885b000 rw-p- libtcl.so
166 The second pass, stap_uprobe_change_semaphore_plus, sets the semaphore.
100a540e 167 If the probe is in a .so this will be when the writable segment of the .so
cc52276b
WC
168 is mapped in. If the task changes, then recalculate the address.
169 */
170
171 for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */
172 sup = & stap_uprobes[i];
173 if (sup->spec_index == -1) continue;
174 if (sup->sdt_sem_address != 0 && !(sup->up.pid == tsk->tgid && sup->sdt_sem_address >= relocation && sup->sdt_sem_address < relocation+length)) continue;
175 if (sup->sdt_sem_address) {
176 unsigned short sdt_semaphore = 0; /* NB: fixed size */
177 if ((rc = get_user (sdt_semaphore, (unsigned short __user*) sup->sdt_sem_address)) == 0) {
178 sdt_semaphore ++;
179 #ifdef DEBUG_UPROBES
180 {
181 const struct stap_uprobe_spec *sups = &stap_uprobe_specs [sup->spec_index];
182 _stp_dbug(__FUNCTION__,__LINE__, "+semaphore %#x @ %#lx spec %d idx %d task %d\n", sdt_semaphore, sup->sdt_sem_address, sup->spec_index, i, tsk->tgid);
183 }
184 #endif
77694f53 185 rc = put_user (sdt_semaphore, (unsigned short __user*) sup->sdt_sem_address);
cc52276b
WC
186 /* XXX: need to analyze possibility of race condition */
187 }
188 }
189 }
190 return rc;
191}
192
193/* Removing/unmapping a uprobe is simpler than adding one (in the
194 _plus function above). We need not care about stap_uprobe_finders
195 or anything, we just scan through stap_uprobes[] for a live probe
196 within the given address range, and kill it. */
197static int stap_uprobe_change_minus (struct task_struct *tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf *stf) {
198 int i;
199
200 /* NB: it's not an error for us not to find a live uprobe within the
201 given range. We might have received a callback for a part of a
202 shlib that was unmapped and unprobed. */
203
204 for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */
205 struct stap_uprobe *sup = & stap_uprobes[i];
206 struct stap_uprobe_spec *sups;
207 if (sup->spec_index < 0) continue; /* skip free uprobes slot */
208 sups = (struct stap_uprobe_spec*) & stap_uprobe_specs[sup->spec_index];
ae1666a0 209 might_sleep();
cc52276b
WC
210 mutex_lock (& stap_uprobes_lock);
211
212 /* PR6829, PR9940:
213 Here we're unregistering for one of two reasons:
214 1. the process image is going away (or gone) due to exit or exec; or
215 2. the vma containing the probepoint has been unmapped.
216 In case 1, it's sort of a nop, because uprobes will notice the event
217 and dispose of the probes eventually, if it hasn't already. But by
218 calling unmap_u[ret]probe() ourselves, we free up sup right away.
219
220 In both cases, we must use unmap_u[ret]probe instead of
221 unregister_u[ret]probe, so uprobes knows not to try to restore the
222 original opcode.
223 */
224
225 /* URETPROBE */
226 if (sups->return_p && sup->urp.u.pid == tsk->tgid && sup->urp.u.vaddr >= relocation && sup->urp.u.vaddr < relocation+length) { /* in range */
227
228 #ifdef DEBUG_UPROBES
26e63673 229 _stp_dbug (__FUNCTION__,__LINE__, "-uretprobe spec %d idx %d process %s[%d] addr %p pp %s\n", sup->spec_index, i, tsk->comm, tsk->tgid, (void*) sup->urp.u.vaddr, sups->probe->pp);
cc52276b
WC
230 #endif
231 #if (UPROBES_API_VERSION >= 2)
232 unmap_uretprobe (& sup->urp);
233 sup->spec_index = -1;
7554a13f 234 sup->sdt_sem_address = 0;
cc52276b
WC
235 #else
236 /* Uprobes lacks unmap_uretprobe. Before reusing sup, we must wait
237 until uprobes turns loose of the uretprobe on its own, as indicated
238 by uretprobe.kdata = NULL. */
239 sup->spec_index = -2;
240 #endif
241 /* UPROBE */
242 } else if (!sups->return_p && sup->up.pid == tsk->tgid && sup->up.vaddr >= relocation && sup->up.vaddr < relocation+length) { /* in range */
243
244 #ifdef DEBUG_UPROBES
26e63673 245 _stp_dbug (__FUNCTION__,__LINE__, "-uprobe spec %d idx %d process %s[%d] reloc %p pp %s\n", sup->spec_index, i, tsk->comm, tsk->tgid, (void*) sup->up.vaddr, sups->probe->pp);
cc52276b
WC
246 #endif
247 #if (UPROBES_API_VERSION >= 2)
248 unmap_uprobe (& sup->up);
249 sup->spec_index = -1;
7554a13f 250 sup->sdt_sem_address = 0;
cc52276b
WC
251 #else
252 /* Uprobes lacks unmap_uprobe. Before reusing sup, we must wait
253 until uprobes turns loose of the uprobe on its own, as indicated
254 by uprobe.kdata = NULL. */
255 sup->spec_index = -1;
7554a13f 256 sup->sdt_sem_address = 0;
cc52276b
WC
257 #endif
258 /* PR10655: we don't need to fidget with the ENABLED semaphore either,
259 as the process is gone, buh-bye, toodaloo, au revoir, see ya later! */
260 }
261 mutex_unlock (& stap_uprobes_lock);
262 } /* close iteration over stap_uprobes[] */
263 return 0; /* XXX: or !handled_p */
264}
265
266/* The task_finder_callback we use for ET_EXEC targets.
267 We used to perform uprobe insertion/removal here, but not any more.
268 (PR10524) */
269static int stap_uprobe_process_found (struct stap_task_finder_target *tgt, struct task_struct *tsk, int register_p, int process_p) {
270 const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
271 if (! process_p) return 0; /* ignore threads */
a2b0b5c8 272 dbug_task_vma(1, "%cproc pid %d stf %p %p path %s\n", register_p?'+':'-', tsk->tgid, tgt, stf, stf->pathname);
cc52276b
WC
273 /* ET_EXEC events are like shlib events, but with 0 relocation bases */
274 if (register_p) {
275 int rc = stap_uprobe_change_plus (tsk, 0, TASK_SIZE, stf, 0, 0);
276 stap_uprobe_change_semaphore_plus (tsk, 0, TASK_SIZE, stf);
277 return rc;
278 } else
279 return stap_uprobe_change_minus (tsk, 0, TASK_SIZE, stf);
280}
281
282/* The task_finder_mmap_callback */
7b9215b2
JS
283static int
284stap_uprobe_mmap_found (struct stap_task_finder_target *tgt,
285 struct task_struct *tsk, char *path,
286 struct dentry *dentry, unsigned long addr,
287 unsigned long length, unsigned long offset,
288 unsigned long vm_flags)
289{
290 int rc = 0;
cc52276b
WC
291 const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
292 /* 1 - shared libraries' executable segments load from offset 0
7b9215b2
JS
293 * - ld.so convention offset != 0 is now allowed
294 * so stap_uprobe_change_plus can set a semaphore,
295 * i.e. a static extern, in a shared object
296 * 2 - the shared library we're interested in
100a540e 297 * 3 - mapping should be executable or writable (for semaphore in .so)
7b9215b2
JS
298 * NB: or both, on kernels that lack noexec mapping
299 */
300 if (path == NULL || strcmp (path, stf->pathname))
301 return 0;
302
d2696bba
DS
303 /* Check non-writable, executable sections for probes. */
304 if ((vm_flags & VM_EXEC) && !(vm_flags & VM_WRITE)) {
a2b0b5c8 305 dbug_task_vma (1,
7b9215b2
JS
306 "+mmap X pid %d path %s addr %p length %u offset %p stf %p %p path %s\n",
307 tsk->tgid, path, (void *) addr, (unsigned)length, (void*) offset,
308 tgt, stf, stf->pathname);
7b9215b2
JS
309 rc = stap_uprobe_change_plus (tsk, addr, length, stf, offset, vm_flags);
310 }
311
100a540e 312 /* Check writable sections for semaphores.
7b9215b2
JS
313 * NB: They may have also been executable for the check above, if we're
314 * running a kernel that lacks noexec mappings. So long as there's
315 * no error (rc == 0), we need to look for semaphores too.
316 */
317 if ((rc == 0) && (vm_flags & VM_WRITE)) {
a2b0b5c8 318 dbug_task_vma (1,
7b9215b2
JS
319 "+mmap W pid %d path %s addr %p length %u offset %p stf %p %p path %s\n",
320 tsk->tgid, path, (void *) addr, (unsigned)length, (void*) offset,
321 tgt, stf, stf->pathname);
7b9215b2
JS
322 rc = stap_uprobe_change_semaphore_plus (tsk, addr, length, stf);
323 }
324
325 return rc;
cc52276b
WC
326}
327
328/* The task_finder_munmap_callback */
329static int stap_uprobe_munmap_found (struct stap_task_finder_target *tgt, struct task_struct *tsk, unsigned long addr, unsigned long length) {
330 const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
a2b0b5c8 331 dbug_task_vma (1, "-mmap pid %d addr %p length %lu stf %p %p path %s\n", tsk->tgid, (void *) addr, length, tgt, stf, stf->pathname);
cc52276b
WC
332 return stap_uprobe_change_minus (tsk, addr, length, stf);
333}
334
19d91f6c
JS
335/* The task_finder_callback we use for ET_DYN targets.
336 This just forces an unmap of everything as the process exits.
337 (PR11151) */
338static int stap_uprobe_process_munmap (struct stap_task_finder_target *tgt, struct task_struct *tsk, int register_p, int process_p) {
339 const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
340 if (! process_p) return 0; /* ignore threads */
a2b0b5c8 341 dbug_task_vma (1, "%cproc pid %d stf %p %p path %s\n", register_p?'+':'-', tsk->tgid, tgt, stf, stf->pathname);
19d91f6c
JS
342 /* Covering 0->TASK_SIZE means "unmap everything" */
343 if (!register_p)
344 return stap_uprobe_change_minus (tsk, 0, TASK_SIZE, stf);
345 return 0;
346}
347
cc52276b 348#endif /* _UPROBE_COMMON_C_ */
This page took 0.234066 seconds and 6 git commands to generate.