]>
Commit | Line | Data |
---|---|---|
cc52276b WC |
1 | /* -*- linux-c -*- |
2 | * uprobe Functions | |
ef36f781 | 3 | * Copyright (C) 2014 Red Hat Inc. |
cc52276b WC |
4 | * |
5 | * This file is part of systemtap, and is free software. You can | |
6 | * redistribute it and/or modify it under the terms of the GNU General | |
7 | * Public License (GPL); either version 2, or (at your option) any | |
8 | * later version. | |
9 | */ | |
10 | ||
11 | #ifndef _UPROBE_COMMON_C_ | |
12 | #define _UPROBE_COMMON_C_ | |
13 | ||
14 | /* NB: Because these utrace callbacks only occur before / after | |
15 | userspace instructions run, there is no concurrency control issue | |
16 | between active uprobe callbacks and these registration / | |
17 | unregistration pieces. | |
18 | ||
19 | We protect the stap_uprobe->spec_index (which also serves as a | |
20 | free/busy flag) value with the outer protective stap_probes_lock | |
21 | spinlock, to protect it against concurrent registration / | |
22 | unregistration. | |
23 | */ | |
24 | ||
25 | static int stap_uprobe_change_plus (struct task_struct *tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf *stf, unsigned long offset, unsigned long vm_flags) { | |
26 | int tfi = (stf - stap_uprobe_finders); | |
27 | int spec_index; | |
28 | /* iterate over stap_uprobe_spec[] that use this same stap_uprobe_tf */ | |
29 | for (spec_index=0; spec_index<sizeof(stap_uprobe_specs)/sizeof(stap_uprobe_specs[0]); spec_index++) { | |
30 | int handled_p = 0; | |
31 | int slotted_p = 0; | |
32 | const struct stap_uprobe_spec *sups = &stap_uprobe_specs [spec_index]; | |
33 | struct stap_uprobe *sup; | |
34 | pid_t sdt_sem_pid; | |
35 | int rc = 0; | |
36 | int i; | |
4fa83377 SC |
37 | int pci; |
38 | ||
cc52276b WC |
39 | if (likely(sups->tfi != tfi)) continue; |
40 | /* skip probes with an address beyond this map event; should not | |
41 | happen unless a shlib/exec got mmapped in weirdly piecemeal */ | |
77694f53 | 42 | if (likely((vm_flags & VM_EXEC) && sups->address >= length)) continue; |
cc52276b WC |
43 | |
44 | /* Found a uprobe_spec for this stap_uprobe_tf. Need to lock the | |
45 | stap_uprobes[] array to allocate a free spot, but then we can | |
46 | unlock and do the register_*probe subsequently. */ | |
47 | ||
ae1666a0 | 48 | might_sleep(); |
cc52276b WC |
49 | mutex_lock (& stap_uprobes_lock); |
50 | for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */ | |
51 | sup = & stap_uprobes[i]; | |
52 | ||
53 | /* register new uprobe | |
54 | We make two passes for semaphores; | |
86229a55 | 55 | see stap_uprobe_change_semaphore_plus */ |
cc52276b WC |
56 | |
57 | if (sup->spec_index < 0 || (sups->sdt_sem_offset && vm_flags & VM_WRITE && sup->spec_index == spec_index)) { | |
58 | #if (UPROBES_API_VERSION < 2) | |
59 | /* See PR6829 comment. */ | |
60 | if (sup->spec_index == -1 && sup->up.kdata != NULL) continue; | |
61 | else if (sup->spec_index == -2 && sup->urp.u.kdata != NULL) continue; | |
62 | #endif | |
63 | sup->spec_index = spec_index; | |
64 | slotted_p = 1; | |
65 | break; | |
66 | } | |
67 | } | |
68 | mutex_unlock (& stap_uprobes_lock); | |
69 | #ifdef DEBUG_UPROBES | |
26e63673 | 70 | _stp_dbug(__FUNCTION__,__LINE__, "+uprobe spec %d idx %d process %s[%d] addr %p pp %s\n", spec_index, (slotted_p ? i : -1), tsk->comm, tsk->tgid, (void*)(relocation+sups->address), sups->probe->pp); |
cc52276b | 71 | #endif |
77c59419 FCE |
72 | |
73 | /* NB: check for user-module build-id only if we have a pathname | |
74 | at all; for a process(PID#).* probe, we may not. If at some | |
75 | point we map process(PID#) to process("/proc/PID#/exe"), we'll | |
76 | get a pathname. */ | |
77 | if (stf->pathname) | |
78 | if ((rc = _stp_usermodule_check(tsk, stf->pathname, relocation))) | |
79 | return rc; | |
cc52276b WC |
80 | |
81 | /* Here, slotted_p implies that `i' points to the single | |
82 | stap_uprobes[] element that has been slotted in for registration | |
83 | or unregistration processing. !slotted_p implies that the table | |
84 | was full (registration; MAXUPROBES) or that no matching entry was | |
85 | found (unregistration; should not happen). */ | |
86 | ||
87 | sdt_sem_pid = (sups->return_p ? sup->urp.u.pid : sup->up.pid); | |
88 | if (sups->sdt_sem_offset && (sdt_sem_pid != tsk->tgid || sup->sdt_sem_address == 0)) { | |
529c7eae JS |
89 | /* If the probe is in an ET_EXEC binary, then the sdt_sem_offset already |
90 | * is a real address. But stap_uprobe_process_found calls us in this | |
91 | * case with relocation=offset=0, so we don't have to worry about it. */ | |
92 | sup->sdt_sem_address = (relocation - offset) + sups->sdt_sem_offset; | |
cc52276b | 93 | } /* sdt_sem_offset */ |
4fa83377 SC |
94 | |
95 | for (pci=0; pci < sups->perf_counters_dim; pci++) { | |
0d049a1d FCE |
96 | if ((sups->perf_counters)[pci] > -1) |
97 | _stp_perf_read_init ((sups->perf_counters)[pci], tsk); | |
4fa83377 SC |
98 | } |
99 | ||
cc52276b WC |
100 | if (slotted_p) { |
101 | struct stap_uprobe *sup = & stap_uprobes[i]; | |
102 | if (sups->return_p) { | |
103 | sup->urp.u.pid = tsk->tgid; | |
104 | sup->urp.u.vaddr = relocation + sups->address; | |
105 | sup->urp.handler = &enter_uretprobe_probe; | |
106 | rc = register_uretprobe (& sup->urp); | |
107 | } else { | |
108 | sup->up.pid = tsk->tgid; | |
109 | sup->up.vaddr = relocation + sups->address; | |
110 | sup->up.handler = &enter_uprobe_probe; | |
111 | rc = register_uprobe (& sup->up); | |
112 | } | |
86229a55 DS |
113 | |
114 | /* The u*probe failed to register. However, if we got EEXIST, | |
115 | * that means that the u*probe is already there, so just ignore | |
116 | * the error. This could happen if CLONE_THREAD or CLONE_VM was | |
117 | * used. */ | |
118 | if (rc != 0 && rc != -EEXIST) { | |
26e63673 | 119 | _stp_warn ("u*probe failed %s[%d] '%s' addr %p rc %d\n", tsk->comm, tsk->tgid, sups->probe->pp, (void*)(relocation + sups->address), rc); |
cc52276b WC |
120 | /* NB: we need to release this slot, |
121 | so we need to borrow the mutex temporarily. */ | |
ae1666a0 | 122 | might_sleep(); |
cc52276b WC |
123 | mutex_lock (& stap_uprobes_lock); |
124 | sup->spec_index = -1; | |
7554a13f | 125 | sup->sdt_sem_address = 0; |
cc52276b WC |
126 | mutex_unlock (& stap_uprobes_lock); |
127 | } else { | |
128 | handled_p = 1; | |
129 | } | |
130 | } | |
131 | /* NB: handled_p implies slotted_p */ | |
132 | if (unlikely (! handled_p)) { | |
133 | #ifdef STP_TIMING | |
4e1434f6 | 134 | atomic_inc (skipped_count_uprobe_reg()); |
cc52276b WC |
135 | #endif |
136 | /* NB: duplicates common_entryfn_epilogue, | |
137 | but then this is not a probe entry fn epilogue. */ | |
7acb3e34 | 138 | #ifndef STAP_SUPPRESS_HANDLER_ERRORS |
228f84ae MF |
139 | if (unlikely (atomic_inc_return (skipped_count()) > MAXSKIPPED)) { |
140 | if (unlikely (pseudo_atomic_cmpxchg(session_state(), STAP_SESSION_RUNNING, STAP_SESSION_ERROR) == STAP_SESSION_RUNNING)) | |
7acb3e34 | 141 | _stp_error ("Skipped too many probes, check MAXSKIPPED or try again with stap -t for more details."); |
cc52276b | 142 | } |
228f84ae | 143 | #endif |
cc52276b WC |
144 | } |
145 | } /* close iteration over stap_uprobe_spec[] */ | |
146 | return 0; /* XXX: or rc? */ | |
147 | } | |
148 | ||
149 | static int stap_uprobe_change_semaphore_plus (struct task_struct *tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf *stf) { | |
150 | int tfi = (stf - stap_uprobe_finders); | |
151 | int spec_index; | |
152 | int rc = 0; | |
153 | struct stap_uprobe *sup; | |
154 | int i; | |
155 | ||
156 | /* We make two passes for semaphores. | |
157 | The first pass, stap_uprobe_change_plus, calculates the address of the | |
158 | semaphore. If the probe is in a .so, we calculate the | |
159 | address when the initial mmap maps the entire solib, e.g. | |
160 | 7f089885a000-7f089885b000 rw-p- libtcl.so | |
100a540e | 161 | A subsequent mmap maps in the writable segment where the |
cc52276b WC |
162 | semaphore control variable lives, e.g. |
163 | 7f089850d000-7f0898647000 r-xp- libtcl.so | |
164 | 7f0898647000-7f0898846000 ---p libtcl.so | |
165 | 7f0898846000-7f089885b000 rw-p- libtcl.so | |
166 | The second pass, stap_uprobe_change_semaphore_plus, sets the semaphore. | |
100a540e | 167 | If the probe is in a .so this will be when the writable segment of the .so |
cc52276b WC |
168 | is mapped in. If the task changes, then recalculate the address. |
169 | */ | |
170 | ||
171 | for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */ | |
172 | sup = & stap_uprobes[i]; | |
173 | if (sup->spec_index == -1) continue; | |
174 | if (sup->sdt_sem_address != 0 && !(sup->up.pid == tsk->tgid && sup->sdt_sem_address >= relocation && sup->sdt_sem_address < relocation+length)) continue; | |
175 | if (sup->sdt_sem_address) { | |
176 | unsigned short sdt_semaphore = 0; /* NB: fixed size */ | |
177 | if ((rc = get_user (sdt_semaphore, (unsigned short __user*) sup->sdt_sem_address)) == 0) { | |
178 | sdt_semaphore ++; | |
179 | #ifdef DEBUG_UPROBES | |
180 | { | |
181 | const struct stap_uprobe_spec *sups = &stap_uprobe_specs [sup->spec_index]; | |
182 | _stp_dbug(__FUNCTION__,__LINE__, "+semaphore %#x @ %#lx spec %d idx %d task %d\n", sdt_semaphore, sup->sdt_sem_address, sup->spec_index, i, tsk->tgid); | |
183 | } | |
184 | #endif | |
77694f53 | 185 | rc = put_user (sdt_semaphore, (unsigned short __user*) sup->sdt_sem_address); |
cc52276b WC |
186 | /* XXX: need to analyze possibility of race condition */ |
187 | } | |
188 | } | |
189 | } | |
190 | return rc; | |
191 | } | |
192 | ||
193 | /* Removing/unmapping a uprobe is simpler than adding one (in the | |
194 | _plus function above). We need not care about stap_uprobe_finders | |
195 | or anything, we just scan through stap_uprobes[] for a live probe | |
196 | within the given address range, and kill it. */ | |
197 | static int stap_uprobe_change_minus (struct task_struct *tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf *stf) { | |
198 | int i; | |
199 | ||
200 | /* NB: it's not an error for us not to find a live uprobe within the | |
201 | given range. We might have received a callback for a part of a | |
202 | shlib that was unmapped and unprobed. */ | |
203 | ||
204 | for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */ | |
205 | struct stap_uprobe *sup = & stap_uprobes[i]; | |
206 | struct stap_uprobe_spec *sups; | |
207 | if (sup->spec_index < 0) continue; /* skip free uprobes slot */ | |
208 | sups = (struct stap_uprobe_spec*) & stap_uprobe_specs[sup->spec_index]; | |
ae1666a0 | 209 | might_sleep(); |
cc52276b WC |
210 | mutex_lock (& stap_uprobes_lock); |
211 | ||
212 | /* PR6829, PR9940: | |
213 | Here we're unregistering for one of two reasons: | |
214 | 1. the process image is going away (or gone) due to exit or exec; or | |
215 | 2. the vma containing the probepoint has been unmapped. | |
216 | In case 1, it's sort of a nop, because uprobes will notice the event | |
217 | and dispose of the probes eventually, if it hasn't already. But by | |
218 | calling unmap_u[ret]probe() ourselves, we free up sup right away. | |
219 | ||
220 | In both cases, we must use unmap_u[ret]probe instead of | |
221 | unregister_u[ret]probe, so uprobes knows not to try to restore the | |
222 | original opcode. | |
223 | */ | |
224 | ||
225 | /* URETPROBE */ | |
226 | if (sups->return_p && sup->urp.u.pid == tsk->tgid && sup->urp.u.vaddr >= relocation && sup->urp.u.vaddr < relocation+length) { /* in range */ | |
227 | ||
228 | #ifdef DEBUG_UPROBES | |
26e63673 | 229 | _stp_dbug (__FUNCTION__,__LINE__, "-uretprobe spec %d idx %d process %s[%d] addr %p pp %s\n", sup->spec_index, i, tsk->comm, tsk->tgid, (void*) sup->urp.u.vaddr, sups->probe->pp); |
cc52276b WC |
230 | #endif |
231 | #if (UPROBES_API_VERSION >= 2) | |
232 | unmap_uretprobe (& sup->urp); | |
233 | sup->spec_index = -1; | |
7554a13f | 234 | sup->sdt_sem_address = 0; |
cc52276b WC |
235 | #else |
236 | /* Uprobes lacks unmap_uretprobe. Before reusing sup, we must wait | |
237 | until uprobes turns loose of the uretprobe on its own, as indicated | |
238 | by uretprobe.kdata = NULL. */ | |
239 | sup->spec_index = -2; | |
240 | #endif | |
241 | /* UPROBE */ | |
242 | } else if (!sups->return_p && sup->up.pid == tsk->tgid && sup->up.vaddr >= relocation && sup->up.vaddr < relocation+length) { /* in range */ | |
243 | ||
244 | #ifdef DEBUG_UPROBES | |
26e63673 | 245 | _stp_dbug (__FUNCTION__,__LINE__, "-uprobe spec %d idx %d process %s[%d] reloc %p pp %s\n", sup->spec_index, i, tsk->comm, tsk->tgid, (void*) sup->up.vaddr, sups->probe->pp); |
cc52276b WC |
246 | #endif |
247 | #if (UPROBES_API_VERSION >= 2) | |
248 | unmap_uprobe (& sup->up); | |
249 | sup->spec_index = -1; | |
7554a13f | 250 | sup->sdt_sem_address = 0; |
cc52276b WC |
251 | #else |
252 | /* Uprobes lacks unmap_uprobe. Before reusing sup, we must wait | |
253 | until uprobes turns loose of the uprobe on its own, as indicated | |
254 | by uprobe.kdata = NULL. */ | |
255 | sup->spec_index = -1; | |
7554a13f | 256 | sup->sdt_sem_address = 0; |
cc52276b WC |
257 | #endif |
258 | /* PR10655: we don't need to fidget with the ENABLED semaphore either, | |
259 | as the process is gone, buh-bye, toodaloo, au revoir, see ya later! */ | |
260 | } | |
261 | mutex_unlock (& stap_uprobes_lock); | |
262 | } /* close iteration over stap_uprobes[] */ | |
263 | return 0; /* XXX: or !handled_p */ | |
264 | } | |
265 | ||
266 | /* The task_finder_callback we use for ET_EXEC targets. | |
267 | We used to perform uprobe insertion/removal here, but not any more. | |
268 | (PR10524) */ | |
269 | static int stap_uprobe_process_found (struct stap_task_finder_target *tgt, struct task_struct *tsk, int register_p, int process_p) { | |
270 | const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder); | |
271 | if (! process_p) return 0; /* ignore threads */ | |
a2b0b5c8 | 272 | dbug_task_vma(1, "%cproc pid %d stf %p %p path %s\n", register_p?'+':'-', tsk->tgid, tgt, stf, stf->pathname); |
cc52276b WC |
273 | /* ET_EXEC events are like shlib events, but with 0 relocation bases */ |
274 | if (register_p) { | |
275 | int rc = stap_uprobe_change_plus (tsk, 0, TASK_SIZE, stf, 0, 0); | |
276 | stap_uprobe_change_semaphore_plus (tsk, 0, TASK_SIZE, stf); | |
277 | return rc; | |
278 | } else | |
279 | return stap_uprobe_change_minus (tsk, 0, TASK_SIZE, stf); | |
280 | } | |
281 | ||
282 | /* The task_finder_mmap_callback */ | |
7b9215b2 JS |
283 | static int |
284 | stap_uprobe_mmap_found (struct stap_task_finder_target *tgt, | |
285 | struct task_struct *tsk, char *path, | |
286 | struct dentry *dentry, unsigned long addr, | |
287 | unsigned long length, unsigned long offset, | |
288 | unsigned long vm_flags) | |
289 | { | |
290 | int rc = 0; | |
cc52276b WC |
291 | const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder); |
292 | /* 1 - shared libraries' executable segments load from offset 0 | |
7b9215b2 JS |
293 | * - ld.so convention offset != 0 is now allowed |
294 | * so stap_uprobe_change_plus can set a semaphore, | |
295 | * i.e. a static extern, in a shared object | |
296 | * 2 - the shared library we're interested in | |
100a540e | 297 | * 3 - mapping should be executable or writable (for semaphore in .so) |
7b9215b2 JS |
298 | * NB: or both, on kernels that lack noexec mapping |
299 | */ | |
300 | if (path == NULL || strcmp (path, stf->pathname)) | |
301 | return 0; | |
302 | ||
d2696bba DS |
303 | /* Check non-writable, executable sections for probes. */ |
304 | if ((vm_flags & VM_EXEC) && !(vm_flags & VM_WRITE)) { | |
a2b0b5c8 | 305 | dbug_task_vma (1, |
7b9215b2 JS |
306 | "+mmap X pid %d path %s addr %p length %u offset %p stf %p %p path %s\n", |
307 | tsk->tgid, path, (void *) addr, (unsigned)length, (void*) offset, | |
308 | tgt, stf, stf->pathname); | |
7b9215b2 JS |
309 | rc = stap_uprobe_change_plus (tsk, addr, length, stf, offset, vm_flags); |
310 | } | |
311 | ||
100a540e | 312 | /* Check writable sections for semaphores. |
7b9215b2 JS |
313 | * NB: They may have also been executable for the check above, if we're |
314 | * running a kernel that lacks noexec mappings. So long as there's | |
315 | * no error (rc == 0), we need to look for semaphores too. | |
316 | */ | |
317 | if ((rc == 0) && (vm_flags & VM_WRITE)) { | |
a2b0b5c8 | 318 | dbug_task_vma (1, |
7b9215b2 JS |
319 | "+mmap W pid %d path %s addr %p length %u offset %p stf %p %p path %s\n", |
320 | tsk->tgid, path, (void *) addr, (unsigned)length, (void*) offset, | |
321 | tgt, stf, stf->pathname); | |
7b9215b2 JS |
322 | rc = stap_uprobe_change_semaphore_plus (tsk, addr, length, stf); |
323 | } | |
324 | ||
325 | return rc; | |
cc52276b WC |
326 | } |
327 | ||
328 | /* The task_finder_munmap_callback */ | |
329 | static int stap_uprobe_munmap_found (struct stap_task_finder_target *tgt, struct task_struct *tsk, unsigned long addr, unsigned long length) { | |
330 | const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder); | |
a2b0b5c8 | 331 | dbug_task_vma (1, "-mmap pid %d addr %p length %lu stf %p %p path %s\n", tsk->tgid, (void *) addr, length, tgt, stf, stf->pathname); |
cc52276b WC |
332 | return stap_uprobe_change_minus (tsk, addr, length, stf); |
333 | } | |
334 | ||
19d91f6c JS |
335 | /* The task_finder_callback we use for ET_DYN targets. |
336 | This just forces an unmap of everything as the process exits. | |
337 | (PR11151) */ | |
338 | static int stap_uprobe_process_munmap (struct stap_task_finder_target *tgt, struct task_struct *tsk, int register_p, int process_p) { | |
339 | const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder); | |
340 | if (! process_p) return 0; /* ignore threads */ | |
a2b0b5c8 | 341 | dbug_task_vma (1, "%cproc pid %d stf %p %p path %s\n", register_p?'+':'-', tsk->tgid, tgt, stf, stf->pathname); |
19d91f6c JS |
342 | /* Covering 0->TASK_SIZE means "unmap everything" */ |
343 | if (!register_p) | |
344 | return stap_uprobe_change_minus (tsk, 0, TASK_SIZE, stf); | |
345 | return 0; | |
346 | } | |
347 | ||
cc52276b | 348 | #endif /* _UPROBE_COMMON_C_ */ |