Attachment 'linux-kernel-markers-support-multiple-probes.patch'
Download 1 From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
2
3 RCU style multiple probes support for the Linux Kernel Markers. Common case
4 (one probe) is still fast and does not require dynamic allocation or a
5 supplementary pointer dereference on the fast path.
6
7 - Move preempt disable from the marker site to the callback.
8
9 Since we now have an internal callback, move the preempt disable/enable to the
10 callback instead of the marker site.
11
12 Since the callback change is done asynchronously (passing from a handler that
13 supports arguments to a handler that does not setup the arguments is no
14 arguments are passed), we can safely update it even if it is outside the
15 preempt disable section.
16
17 - Move probe arm to probe connection. Now, a connected probe is automatically
18 armed.
19
20 Remove MARK_MAX_FORMAT_LEN, unused.
21
22 This patch modifies the Linux Kernel Markers API : it removes the probe
23 "arm/disarm" and changes the probe function prototype : it now expects a
24 va_list * instead of a "...".
25
26 If we want to have more than one probe connected to a marker at a given
27 time (LTTng, or blktrace, ssytemtap) then we need this patch. Without it,
28 connecting a second probe handler to a marker will fail.
29
30 It allow us, for instance, to do interesting combinations :
31
32 Do standard tracing with LTTng and, eventually, to compute statistics
33 with SystemTAP, or to have a special trigger on an event that would call
34 a systemtap script which would stop flight recorder tracing.
35
36 Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
37 Cc: Christoph Hellwig <hch@infradead.org>
38 Cc: Mike Mason <mmlnx@us.ibm.com>
39 Cc: Dipankar Sarma <dipankar@in.ibm.com>
40 Cc: David Smith <dsmith@redhat.com>
41 Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
42 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
43 ---
44
45 include/linux/marker.h | 59 +-
46 include/linux/module.h | 2
47 kernel/marker.c | 671 ++++++++++++++++++++++--------
48 kernel/module.c | 7
49 samples/markers/probe-example.c | 25 -
50 5 files changed, 548 insertions(+), 216 deletions(-)
51
52 diff -puN include/linux/marker.h~linux-kernel-markers-support-multiple-probes include/linux/marker.h
53 --- a/include/linux/marker.h~linux-kernel-markers-support-multiple-probes
54 +++ a/include/linux/marker.h
55 @@ -19,16 +19,23 @@ struct marker;
56
57 /**
58 * marker_probe_func - Type of a marker probe function
59 - * @mdata: pointer of type struct marker
60 - * @private_data: caller site private data
61 + * @probe_private: probe private data
62 + * @call_private: call site private data
63 * @fmt: format string
64 - * @...: variable argument list
65 + * @args: variable argument list pointer. Use a pointer to overcome C's
66 + * inability to pass this around as a pointer in a portable manner in
67 + * the callee otherwise.
68 *
69 * Type of marker probe functions. They receive the mdata and need to parse the
70 * format string to recover the variable argument list.
71 */
72 -typedef void marker_probe_func(const struct marker *mdata,
73 - void *private_data, const char *fmt, ...);
74 +typedef void marker_probe_func(void *probe_private, void *call_private,
75 + const char *fmt, va_list *args);
76 +
77 +struct marker_probe_closure {
78 + marker_probe_func *func; /* Callback */
79 + void *probe_private; /* Private probe data */
80 +};
81
82 struct marker {
83 const char *name; /* Marker name */
84 @@ -36,8 +43,11 @@ struct marker {
85 * variable argument list.
86 */
87 char state; /* Marker state. */
88 - marker_probe_func *call;/* Probe handler function pointer */
89 - void *private; /* Private probe data */
90 + char ptype; /* probe type : 0 : single, 1 : multi */
91 + void (*call)(const struct marker *mdata, /* Probe wrapper */
92 + void *call_private, const char *fmt, ...);
93 + struct marker_probe_closure single;
94 + struct marker_probe_closure *multi;
95 } __attribute__((aligned(8)));
96
97 #ifdef CONFIG_MARKERS
98 @@ -49,7 +59,7 @@ struct marker {
99 * not add unwanted padding between the beginning of the section and the
100 * structure. Force alignment to the same alignment as the section start.
101 */
102 -#define __trace_mark(name, call_data, format, args...) \
103 +#define __trace_mark(name, call_private, format, args...) \
104 do { \
105 static const char __mstrtab_name_##name[] \
106 __attribute__((section("__markers_strings"))) \
107 @@ -60,24 +70,23 @@ struct marker {
108 static struct marker __mark_##name \
109 __attribute__((section("__markers"), aligned(8))) = \
110 { __mstrtab_name_##name, __mstrtab_format_##name, \
111 - 0, __mark_empty_function, NULL }; \
112 + 0, 0, marker_probe_cb, \
113 + { __mark_empty_function, NULL}, NULL }; \
114 __mark_check_format(format, ## args); \
115 if (unlikely(__mark_##name.state)) { \
116 - preempt_disable(); \
117 (*__mark_##name.call) \
118 - (&__mark_##name, call_data, \
119 + (&__mark_##name, call_private, \
120 format, ## args); \
121 - preempt_enable(); \
122 } \
123 } while (0)
124
125 extern void marker_update_probe_range(struct marker *begin,
126 - struct marker *end, struct module *probe_module, int *refcount);
127 + struct marker *end);
128 #else /* !CONFIG_MARKERS */
129 -#define __trace_mark(name, call_data, format, args...) \
130 +#define __trace_mark(name, call_private, format, args...) \
131 __mark_check_format(format, ## args)
132 static inline void marker_update_probe_range(struct marker *begin,
133 - struct marker *end, struct module *probe_module, int *refcount)
134 + struct marker *end)
135 { }
136 #endif /* CONFIG_MARKERS */
137
138 @@ -92,8 +101,6 @@ static inline void marker_update_probe_r
139 #define trace_mark(name, format, args...) \
140 __trace_mark(name, NULL, format, ## args)
141
142 -#define MARK_MAX_FORMAT_LEN 1024
143 -
144 /**
145 * MARK_NOARGS - Format string for a marker with no argument.
146 */
147 @@ -106,24 +113,30 @@ static inline void __printf(1, 2) __mark
148
149 extern marker_probe_func __mark_empty_function;
150
151 +extern void marker_probe_cb(const struct marker *mdata,
152 + void *call_private, const char *fmt, ...);
153 +extern void marker_probe_cb_noarg(const struct marker *mdata,
154 + void *call_private, const char *fmt, ...);
155 +
156 /*
157 * Connect a probe to a marker.
158 * private data pointer must be a valid allocated memory address, or NULL.
159 */
160 extern int marker_probe_register(const char *name, const char *format,
161 - marker_probe_func *probe, void *private);
162 + marker_probe_func *probe, void *probe_private);
163
164 /*
165 * Returns the private data given to marker_probe_register.
166 */
167 -extern void *marker_probe_unregister(const char *name);
168 +extern int marker_probe_unregister(const char *name,
169 + marker_probe_func *probe, void *probe_private);
170 /*
171 * Unregister a marker by providing the registered private data.
172 */
173 -extern void *marker_probe_unregister_private_data(void *private);
174 +extern int marker_probe_unregister_private_data(marker_probe_func *probe,
175 + void *probe_private);
176
177 -extern int marker_arm(const char *name);
178 -extern int marker_disarm(const char *name);
179 -extern void *marker_get_private_data(const char *name);
180 +extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
181 + int num);
182
183 #endif
184 diff -puN include/linux/module.h~linux-kernel-markers-support-multiple-probes include/linux/module.h
185 --- a/include/linux/module.h~linux-kernel-markers-support-multiple-probes
186 +++ a/include/linux/module.h
187 @@ -462,7 +462,7 @@ int unregister_module_notifier(struct no
188
189 extern void print_modules(void);
190
191 -extern void module_update_markers(struct module *probe_module, int *refcount);
192 +extern void module_update_markers(void);
193
194 #else /* !CONFIG_MODULES... */
195 #define EXPORT_SYMBOL(sym)
196 diff -puN kernel/marker.c~linux-kernel-markers-support-multiple-probes kernel/marker.c
197 --- a/kernel/marker.c~linux-kernel-markers-support-multiple-probes
198 +++ a/kernel/marker.c
199 @@ -27,35 +27,41 @@
200 extern struct marker __start___markers[];
201 extern struct marker __stop___markers[];
202
203 +/* Set to 1 to enable marker debug output */
204 +const int marker_debug;
205 +
206 /*
207 * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
208 - * and module markers, the hash table and deferred_sync.
209 + * and module markers and the hash table.
210 */
211 static DEFINE_MUTEX(markers_mutex);
212
213 /*
214 - * Marker deferred synchronization.
215 - * Upon marker probe_unregister, we delay call to synchronize_sched() to
216 - * accelerate mass unregistration (only when there is no more reference to a
217 - * given module do we call synchronize_sched()). However, we need to make sure
218 - * every critical region has ended before we re-arm a marker that has been
219 - * unregistered and then registered back with a different probe data.
220 - */
221 -static int deferred_sync;
222 -
223 -/*
224 * Marker hash table, containing the active markers.
225 * Protected by module_mutex.
226 */
227 #define MARKER_HASH_BITS 6
228 #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
229
230 +/*
231 + * Note about RCU :
232 + * It is used to make sure every handler has finished using its private data
233 + * between two consecutive operation (add or remove) on a given marker. It is
234 + * also used to delay the free of multiple probes array until a quiescent state
235 + * is reached.
236 + */
237 struct marker_entry {
238 struct hlist_node hlist;
239 char *format;
240 - marker_probe_func *probe;
241 - void *private;
242 + void (*call)(const struct marker *mdata, /* Probe wrapper */
243 + void *call_private, const char *fmt, ...);
244 + struct marker_probe_closure single;
245 + struct marker_probe_closure *multi;
246 int refcount; /* Number of times armed. 0 if disarmed. */
247 + struct rcu_head rcu;
248 + void *oldptr;
249 + char rcu_pending:1;
250 + char ptype:1;
251 char name[0]; /* Contains name'\0'format'\0' */
252 };
253
254 @@ -63,7 +69,8 @@ static struct hlist_head marker_table[MA
255
256 /**
257 * __mark_empty_function - Empty probe callback
258 - * @mdata: pointer of type const struct marker
259 + * @probe_private: probe private data
260 + * @call_private: call site private data
261 * @fmt: format string
262 * @...: variable argument list
263 *
264 @@ -72,13 +79,262 @@ static struct hlist_head marker_table[MA
265 * though the function pointer change and the marker enabling are two distinct
266 * operations that modifies the execution flow of preemptible code.
267 */
268 -void __mark_empty_function(const struct marker *mdata, void *private,
269 - const char *fmt, ...)
270 +void __mark_empty_function(void *probe_private, void *call_private,
271 + const char *fmt, va_list *args)
272 {
273 }
274 EXPORT_SYMBOL_GPL(__mark_empty_function);
275
276 /*
277 + * marker_probe_cb Callback that prepares the variable argument list for probes.
278 + * @mdata: pointer of type struct marker
279 + * @call_private: caller site private data
280 + * @fmt: format string
281 + * @...: Variable argument list.
282 + *
283 + * Since we do not use "typical" pointer based RCU in the 1 argument case, we
284 + * need to put a full smp_rmb() in this branch. This is why we do not use
285 + * rcu_dereference() for the pointer read.
286 + */
287 +void marker_probe_cb(const struct marker *mdata, void *call_private,
288 + const char *fmt, ...)
289 +{
290 + va_list args;
291 + char ptype;
292 +
293 + preempt_disable();
294 + ptype = ACCESS_ONCE(mdata->ptype);
295 + if (likely(!ptype)) {
296 + marker_probe_func *func;
297 + /* Must read the ptype before ptr. They are not data dependant,
298 + * so we put an explicit smp_rmb() here. */
299 + smp_rmb();
300 + func = ACCESS_ONCE(mdata->single.func);
301 + /* Must read the ptr before private data. They are not data
302 + * dependant, so we put an explicit smp_rmb() here. */
303 + smp_rmb();
304 + va_start(args, fmt);
305 + func(mdata->single.probe_private, call_private, fmt, &args);
306 + va_end(args);
307 + } else {
308 + struct marker_probe_closure *multi;
309 + int i;
310 + /*
311 + * multi points to an array, therefore accessing the array
312 + * depends on reading multi. However, even in this case,
313 + * we must insure that the pointer is read _before_ the array
314 + * data. Same as rcu_dereference, but we need a full smp_rmb()
315 + * in the fast path, so put the explicit barrier here.
316 + */
317 + smp_read_barrier_depends();
318 + multi = ACCESS_ONCE(mdata->multi);
319 + for (i = 0; multi[i].func; i++) {
320 + va_start(args, fmt);
321 + multi[i].func(multi[i].probe_private, call_private, fmt,
322 + &args);
323 + va_end(args);
324 + }
325 + }
326 + preempt_enable();
327 +}
328 +EXPORT_SYMBOL_GPL(marker_probe_cb);
329 +
330 +/*
331 + * marker_probe_cb Callback that does not prepare the variable argument list.
332 + * @mdata: pointer of type struct marker
333 + * @call_private: caller site private data
334 + * @fmt: format string
335 + * @...: Variable argument list.
336 + *
337 + * Should be connected to markers "MARK_NOARGS".
338 + */
339 +void marker_probe_cb_noarg(const struct marker *mdata,
340 + void *call_private, const char *fmt, ...)
341 +{
342 + va_list args; /* not initialized */
343 + char ptype;
344 +
345 + preempt_disable();
346 + ptype = ACCESS_ONCE(mdata->ptype);
347 + if (likely(!ptype)) {
348 + marker_probe_func *func;
349 + /* Must read the ptype before ptr. They are not data dependant,
350 + * so we put an explicit smp_rmb() here. */
351 + smp_rmb();
352 + func = ACCESS_ONCE(mdata->single.func);
353 + /* Must read the ptr before private data. They are not data
354 + * dependant, so we put an explicit smp_rmb() here. */
355 + smp_rmb();
356 + func(mdata->single.probe_private, call_private, fmt, &args);
357 + } else {
358 + struct marker_probe_closure *multi;
359 + int i;
360 + /*
361 + * multi points to an array, therefore accessing the array
362 + * depends on reading multi. However, even in this case,
363 + * we must insure that the pointer is read _before_ the array
364 + * data. Same as rcu_dereference, but we need a full smp_rmb()
365 + * in the fast path, so put the explicit barrier here.
366 + */
367 + smp_read_barrier_depends();
368 + multi = ACCESS_ONCE(mdata->multi);
369 + for (i = 0; multi[i].func; i++)
370 + multi[i].func(multi[i].probe_private, call_private, fmt,
371 + &args);
372 + }
373 + preempt_enable();
374 +}
375 +EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
376 +
377 +static void free_old_closure(struct rcu_head *head)
378 +{
379 + struct marker_entry *entry = container_of(head,
380 + struct marker_entry, rcu);
381 + kfree(entry->oldptr);
382 + /* Make sure we free the data before setting the pending flag to 0 */
383 + smp_wmb();
384 + entry->rcu_pending = 0;
385 +}
386 +
387 +static inline void debug_print_probes(struct marker_entry *entry)
388 +{
389 + int i;
390 +
391 + if (!marker_debug)
392 + return;
393 +
394 + if (!entry->ptype) {
395 + printk(KERN_DEBUG "Single probe : %p %p\n",
396 + entry->single.func,
397 + entry->single.probe_private);
398 + } else {
399 + for (i = 0; entry->multi[i].func; i++)
400 + printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
401 + entry->multi[i].func,
402 + entry->multi[i].probe_private);
403 + }
404 +}
405 +
406 +static struct marker_probe_closure *
407 +marker_entry_add_probe(struct marker_entry *entry,
408 + marker_probe_func *probe, void *probe_private)
409 +{
410 + int nr_probes = 0;
411 + struct marker_probe_closure *old, *new;
412 +
413 + WARN_ON(!probe);
414 +
415 + debug_print_probes(entry);
416 + old = entry->multi;
417 + if (!entry->ptype) {
418 + if (entry->single.func == probe &&
419 + entry->single.probe_private == probe_private)
420 + return ERR_PTR(-EBUSY);
421 + if (entry->single.func == __mark_empty_function) {
422 + /* 0 -> 1 probes */
423 + entry->single.func = probe;
424 + entry->single.probe_private = probe_private;
425 + entry->refcount = 1;
426 + entry->ptype = 0;
427 + debug_print_probes(entry);
428 + return NULL;
429 + } else {
430 + /* 1 -> 2 probes */
431 + nr_probes = 1;
432 + old = NULL;
433 + }
434 + } else {
435 + /* (N -> N+1), (N != 0, 1) probes */
436 + for (nr_probes = 0; old[nr_probes].func; nr_probes++)
437 + if (old[nr_probes].func == probe
438 + && old[nr_probes].probe_private
439 + == probe_private)
440 + return ERR_PTR(-EBUSY);
441 + }
442 + /* + 2 : one for new probe, one for NULL func */
443 + new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
444 + GFP_KERNEL);
445 + if (new == NULL)
446 + return ERR_PTR(-ENOMEM);
447 + if (!old)
448 + new[0] = entry->single;
449 + else
450 + memcpy(new, old,
451 + nr_probes * sizeof(struct marker_probe_closure));
452 + new[nr_probes].func = probe;
453 + new[nr_probes].probe_private = probe_private;
454 + entry->refcount = nr_probes + 1;
455 + entry->multi = new;
456 + entry->ptype = 1;
457 + debug_print_probes(entry);
458 + return old;
459 +}
460 +
461 +static struct marker_probe_closure *
462 +marker_entry_remove_probe(struct marker_entry *entry,
463 + marker_probe_func *probe, void *probe_private)
464 +{
465 + int nr_probes = 0, nr_del = 0, i;
466 + struct marker_probe_closure *old, *new;
467 +
468 + old = entry->multi;
469 +
470 + debug_print_probes(entry);
471 + if (!entry->ptype) {
472 + /* 0 -> N is an error */
473 + WARN_ON(entry->single.func == __mark_empty_function);
474 + /* 1 -> 0 probes */
475 + WARN_ON(probe && entry->single.func != probe);
476 + WARN_ON(entry->single.probe_private != probe_private);
477 + entry->single.func = __mark_empty_function;
478 + entry->refcount = 0;
479 + entry->ptype = 0;
480 + debug_print_probes(entry);
481 + return NULL;
482 + } else {
483 + /* (N -> M), (N > 1, M >= 0) probes */
484 + for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
485 + if ((!probe || old[nr_probes].func == probe)
486 + && old[nr_probes].probe_private
487 + == probe_private)
488 + nr_del++;
489 + }
490 + }
491 +
492 + if (nr_probes - nr_del == 0) {
493 + /* N -> 0, (N > 1) */
494 + entry->single.func = __mark_empty_function;
495 + entry->refcount = 0;
496 + entry->ptype = 0;
497 + } else if (nr_probes - nr_del == 1) {
498 + /* N -> 1, (N > 1) */
499 + for (i = 0; old[i].func; i++)
500 + if ((probe && old[i].func != probe) ||
501 + old[i].probe_private != probe_private)
502 + entry->single = old[i];
503 + entry->refcount = 1;
504 + entry->ptype = 0;
505 + } else {
506 + int j = 0;
507 + /* N -> M, (N > 1, M > 1) */
508 + /* + 1 for NULL */
509 + new = kzalloc((nr_probes - nr_del + 1)
510 + * sizeof(struct marker_probe_closure), GFP_KERNEL);
511 + if (new == NULL)
512 + return ERR_PTR(-ENOMEM);
513 + for (i = 0; old[i].func; i++)
514 + if ((probe && old[i].func != probe) ||
515 + old[i].probe_private != probe_private)
516 + new[j++] = old[i];
517 + entry->refcount = nr_probes - nr_del;
518 + entry->ptype = 1;
519 + entry->multi = new;
520 + }
521 + debug_print_probes(entry);
522 + return old;
523 +}
524 +
525 +/*
526 * Get marker if the marker is present in the marker hash table.
527 * Must be called with markers_mutex held.
528 * Returns NULL if not present.
529 @@ -102,8 +358,7 @@ static struct marker_entry *get_marker(c
530 * Add the marker to the marker hash table. Must be called with markers_mutex
531 * held.
532 */
533 -static int add_marker(const char *name, const char *format,
534 - marker_probe_func *probe, void *private)
535 +static struct marker_entry *add_marker(const char *name, const char *format)
536 {
537 struct hlist_head *head;
538 struct hlist_node *node;
539 @@ -118,9 +373,8 @@ static int add_marker(const char *name,
540 hlist_for_each_entry(e, node, head, hlist) {
541 if (!strcmp(name, e->name)) {
542 printk(KERN_NOTICE
543 - "Marker %s busy, probe %p already installed\n",
544 - name, e->probe);
545 - return -EBUSY; /* Already there */
546 + "Marker %s busy\n", name);
547 + return ERR_PTR(-EBUSY); /* Already there */
548 }
549 }
550 /*
551 @@ -130,34 +384,42 @@ static int add_marker(const char *name,
552 e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
553 GFP_KERNEL);
554 if (!e)
555 - return -ENOMEM;
556 + return ERR_PTR(-ENOMEM);
557 memcpy(&e->name[0], name, name_len);
558 if (format) {
559 e->format = &e->name[name_len];
560 memcpy(e->format, format, format_len);
561 + if (strcmp(e->format, MARK_NOARGS) == 0)
562 + e->call = marker_probe_cb_noarg;
563 + else
564 + e->call = marker_probe_cb;
565 trace_mark(core_marker_format, "name %s format %s",
566 e->name, e->format);
567 - } else
568 + } else {
569 e->format = NULL;
570 - e->probe = probe;
571 - e->private = private;
572 + e->call = marker_probe_cb;
573 + }
574 + e->single.func = __mark_empty_function;
575 + e->single.probe_private = NULL;
576 + e->multi = NULL;
577 + e->ptype = 0;
578 e->refcount = 0;
579 + e->rcu_pending = 0;
580 hlist_add_head(&e->hlist, head);
581 - return 0;
582 + return e;
583 }
584
585 /*
586 * Remove the marker from the marker hash table. Must be called with mutex_lock
587 * held.
588 */
589 -static void *remove_marker(const char *name)
590 +static int remove_marker(const char *name)
591 {
592 struct hlist_head *head;
593 struct hlist_node *node;
594 struct marker_entry *e;
595 int found = 0;
596 size_t len = strlen(name) + 1;
597 - void *private = NULL;
598 u32 hash = jhash(name, len-1, 0);
599
600 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
601 @@ -167,12 +429,16 @@ static void *remove_marker(const char *n
602 break;
603 }
604 }
605 - if (found) {
606 - private = e->private;
607 - hlist_del(&e->hlist);
608 - kfree(e);
609 - }
610 - return private;
611 + if (!found)
612 + return -ENOENT;
613 + if (e->single.func != __mark_empty_function)
614 + return -EBUSY;
615 + hlist_del(&e->hlist);
616 + /* Make sure the call_rcu has been executed */
617 + if (e->rcu_pending)
618 + rcu_barrier();
619 + kfree(e);
620 + return 0;
621 }
622
623 /*
624 @@ -184,6 +450,7 @@ static int marker_set_format(struct mark
625 size_t name_len = strlen((*entry)->name) + 1;
626 size_t format_len = strlen(format) + 1;
627
628 +
629 e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
630 GFP_KERNEL);
631 if (!e)
632 @@ -191,11 +458,20 @@ static int marker_set_format(struct mark
633 memcpy(&e->name[0], (*entry)->name, name_len);
634 e->format = &e->name[name_len];
635 memcpy(e->format, format, format_len);
636 - e->probe = (*entry)->probe;
637 - e->private = (*entry)->private;
638 + if (strcmp(e->format, MARK_NOARGS) == 0)
639 + e->call = marker_probe_cb_noarg;
640 + else
641 + e->call = marker_probe_cb;
642 + e->single = (*entry)->single;
643 + e->multi = (*entry)->multi;
644 + e->ptype = (*entry)->ptype;
645 e->refcount = (*entry)->refcount;
646 + e->rcu_pending = 0;
647 hlist_add_before(&e->hlist, &(*entry)->hlist);
648 hlist_del(&(*entry)->hlist);
649 + /* Make sure the call_rcu has been executed */
650 + if ((*entry)->rcu_pending)
651 + rcu_barrier();
652 kfree(*entry);
653 *entry = e;
654 trace_mark(core_marker_format, "name %s format %s",
655 @@ -206,7 +482,8 @@ static int marker_set_format(struct mark
656 /*
657 * Sets the probe callback corresponding to one marker.
658 */
659 -static int set_marker(struct marker_entry **entry, struct marker *elem)
660 +static int set_marker(struct marker_entry **entry, struct marker *elem,
661 + int active)
662 {
663 int ret;
664 WARN_ON(strcmp((*entry)->name, elem->name) != 0);
665 @@ -226,9 +503,43 @@ static int set_marker(struct marker_entr
666 if (ret)
667 return ret;
668 }
669 - elem->call = (*entry)->probe;
670 - elem->private = (*entry)->private;
671 - elem->state = 1;
672 +
673 + /*
674 + * probe_cb setup (statically known) is done here. It is
675 + * asynchronous with the rest of execution, therefore we only
676 + * pass from a "safe" callback (with argument) to an "unsafe"
677 + * callback (does not set arguments).
678 + */
679 + elem->call = (*entry)->call;
680 + /*
681 + * Sanity check :
682 + * We only update the single probe private data when the ptr is
683 + * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
684 + */
685 + WARN_ON(elem->single.func != __mark_empty_function
686 + && elem->single.probe_private
687 + != (*entry)->single.probe_private &&
688 + !elem->ptype);
689 + elem->single.probe_private = (*entry)->single.probe_private;
690 + /*
691 + * Make sure the private data is valid when we update the
692 + * single probe ptr.
693 + */
694 + smp_wmb();
695 + elem->single.func = (*entry)->single.func;
696 + /*
697 + * We also make sure that the new probe callbacks array is consistent
698 + * before setting a pointer to it.
699 + */
700 + rcu_assign_pointer(elem->multi, (*entry)->multi);
701 + /*
702 + * Update the function or multi probe array pointer before setting the
703 + * ptype.
704 + */
705 + smp_wmb();
706 + elem->ptype = (*entry)->ptype;
707 + elem->state = active;
708 +
709 return 0;
710 }
711
712 @@ -240,8 +551,12 @@ static int set_marker(struct marker_entr
713 */
714 static void disable_marker(struct marker *elem)
715 {
716 + /* leave "call" as is. It is known statically. */
717 elem->state = 0;
718 - elem->call = __mark_empty_function;
719 + elem->single.func = __mark_empty_function;
720 + /* Update the function before setting the ptype */
721 + smp_wmb();
722 + elem->ptype = 0; /* single probe */
723 /*
724 * Leave the private data and id there, because removal is racy and
725 * should be done only after a synchronize_sched(). These are never used
726 @@ -253,14 +568,11 @@ static void disable_marker(struct marker
727 * marker_update_probe_range - Update a probe range
728 * @begin: beginning of the range
729 * @end: end of the range
730 - * @probe_module: module address of the probe being updated
731 - * @refcount: number of references left to the given probe_module (out)
732 *
733 * Updates the probe callback corresponding to a range of markers.
734 */
735 void marker_update_probe_range(struct marker *begin,
736 - struct marker *end, struct module *probe_module,
737 - int *refcount)
738 + struct marker *end)
739 {
740 struct marker *iter;
741 struct marker_entry *mark_entry;
742 @@ -268,15 +580,12 @@ void marker_update_probe_range(struct ma
743 mutex_lock(&markers_mutex);
744 for (iter = begin; iter < end; iter++) {
745 mark_entry = get_marker(iter->name);
746 - if (mark_entry && mark_entry->refcount) {
747 - set_marker(&mark_entry, iter);
748 + if (mark_entry) {
749 + set_marker(&mark_entry, iter,
750 + !!mark_entry->refcount);
751 /*
752 * ignore error, continue
753 */
754 - if (probe_module)
755 - if (probe_module ==
756 - __module_text_address((unsigned long)mark_entry->probe))
757 - (*refcount)++;
758 } else {
759 disable_marker(iter);
760 }
761 @@ -289,20 +598,27 @@ void marker_update_probe_range(struct ma
762 * Issues a synchronize_sched() when no reference to the module passed
763 * as parameter is found in the probes so the probe module can be
764 * safely unloaded from now on.
765 + *
766 + * Internal callback only changed before the first probe is connected to it.
767 + * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
768 + * transitions. All other transitions will leave the old private data valid.
769 + * This makes the non-atomicity of the callback/private data updates valid.
770 + *
771 + * "special case" updates :
772 + * 0 -> 1 callback
773 + * 1 -> 0 callback
774 + * 1 -> 2 callbacks
775 + * 2 -> 1 callbacks
776 + * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
777 + * Site effect : marker_set_format may delete the marker entry (creating a
778 + * replacement).
779 */
780 -static void marker_update_probes(struct module *probe_module)
781 +static void marker_update_probes(void)
782 {
783 - int refcount = 0;
784 -
785 /* Core kernel markers */
786 - marker_update_probe_range(__start___markers,
787 - __stop___markers, probe_module, &refcount);
788 + marker_update_probe_range(__start___markers, __stop___markers);
789 /* Markers in modules. */
790 - module_update_markers(probe_module, &refcount);
791 - if (probe_module && refcount == 0) {
792 - synchronize_sched();
793 - deferred_sync = 0;
794 - }
795 + module_update_markers();
796 }
797
798 /**
799 @@ -310,33 +626,49 @@ static void marker_update_probes(struct
800 * @name: marker name
801 * @format: format string
802 * @probe: probe handler
803 - * @private: probe private data
804 + * @probe_private: probe private data
805 *
806 * private data must be a valid allocated memory address, or NULL.
807 * Returns 0 if ok, error value on error.
808 + * The probe address must at least be aligned on the architecture pointer size.
809 */
810 int marker_probe_register(const char *name, const char *format,
811 - marker_probe_func *probe, void *private)
812 + marker_probe_func *probe, void *probe_private)
813 {
814 struct marker_entry *entry;
815 int ret = 0;
816 + struct marker_probe_closure *old;
817
818 mutex_lock(&markers_mutex);
819 entry = get_marker(name);
820 - if (entry && entry->refcount) {
821 - ret = -EBUSY;
822 - goto end;
823 - }
824 - if (deferred_sync) {
825 - synchronize_sched();
826 - deferred_sync = 0;
827 + if (!entry) {
828 + entry = add_marker(name, format);
829 + if (IS_ERR(entry)) {
830 + ret = PTR_ERR(entry);
831 + goto end;
832 + }
833 }
834 - ret = add_marker(name, format, probe, private);
835 - if (ret)
836 + /*
837 + * If we detect that a call_rcu is pending for this marker,
838 + * make sure it's executed now.
839 + */
840 + if (entry->rcu_pending)
841 + rcu_barrier();
842 + old = marker_entry_add_probe(entry, probe, probe_private);
843 + if (IS_ERR(old)) {
844 + ret = PTR_ERR(old);
845 goto end;
846 + }
847 mutex_unlock(&markers_mutex);
848 - marker_update_probes(NULL);
849 - return ret;
850 + marker_update_probes(); /* may update entry */
851 + mutex_lock(&markers_mutex);
852 + entry = get_marker(name);
853 + WARN_ON(!entry);
854 + entry->oldptr = old;
855 + entry->rcu_pending = 1;
856 + /* write rcu_pending before calling the RCU callback */
857 + smp_wmb();
858 + call_rcu(&entry->rcu, free_old_closure);
859 end:
860 mutex_unlock(&markers_mutex);
861 return ret;
862 @@ -346,171 +678,166 @@ EXPORT_SYMBOL_GPL(marker_probe_register)
863 /**
864 * marker_probe_unregister - Disconnect a probe from a marker
865 * @name: marker name
866 + * @probe: probe function pointer
867 + * @probe_private: probe private data
868 *
869 * Returns the private data given to marker_probe_register, or an ERR_PTR().
870 + * We do not need to call a synchronize_sched to make sure the probes have
871 + * finished running before doing a module unload, because the module unload
872 + * itself uses stop_machine(), which insures that every preempt disabled section
873 + * have finished.
874 */
875 -void *marker_probe_unregister(const char *name)
876 +int marker_probe_unregister(const char *name,
877 + marker_probe_func *probe, void *probe_private)
878 {
879 - struct module *probe_module;
880 struct marker_entry *entry;
881 - void *private;
882 + struct marker_probe_closure *old;
883 + int ret = 0;
884
885 mutex_lock(&markers_mutex);
886 entry = get_marker(name);
887 if (!entry) {
888 - private = ERR_PTR(-ENOENT);
889 + ret = -ENOENT;
890 goto end;
891 }
892 - entry->refcount = 0;
893 - /* In what module is the probe handler ? */
894 - probe_module = __module_text_address((unsigned long)entry->probe);
895 - private = remove_marker(name);
896 - deferred_sync = 1;
897 + if (entry->rcu_pending)
898 + rcu_barrier();
899 + old = marker_entry_remove_probe(entry, probe, probe_private);
900 mutex_unlock(&markers_mutex);
901 - marker_update_probes(probe_module);
902 - return private;
903 + marker_update_probes(); /* may update entry */
904 + mutex_lock(&markers_mutex);
905 + entry = get_marker(name);
906 + entry->oldptr = old;
907 + entry->rcu_pending = 1;
908 + /* write rcu_pending before calling the RCU callback */
909 + smp_wmb();
910 + call_rcu(&entry->rcu, free_old_closure);
911 + remove_marker(name); /* Ignore busy error message */
912 end:
913 mutex_unlock(&markers_mutex);
914 - return private;
915 + return ret;
916 }
917 EXPORT_SYMBOL_GPL(marker_probe_unregister);
918
919 -/**
920 - * marker_probe_unregister_private_data - Disconnect a probe from a marker
921 - * @private: probe private data
922 - *
923 - * Unregister a marker by providing the registered private data.
924 - * Returns the private data given to marker_probe_register, or an ERR_PTR().
925 - */
926 -void *marker_probe_unregister_private_data(void *private)
927 +static struct marker_entry *
928 +get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
929 {
930 - struct module *probe_module;
931 - struct hlist_head *head;
932 - struct hlist_node *node;
933 struct marker_entry *entry;
934 - int found = 0;
935 unsigned int i;
936 + struct hlist_head *head;
937 + struct hlist_node *node;
938
939 - mutex_lock(&markers_mutex);
940 for (i = 0; i < MARKER_TABLE_SIZE; i++) {
941 head = &marker_table[i];
942 hlist_for_each_entry(entry, node, head, hlist) {
943 - if (entry->private == private) {
944 - found = 1;
945 - goto iter_end;
946 + if (!entry->ptype) {
947 + if (entry->single.func == probe
948 + && entry->single.probe_private
949 + == probe_private)
950 + return entry;
951 + } else {
952 + struct marker_probe_closure *closure;
953 + closure = entry->multi;
954 + for (i = 0; closure[i].func; i++) {
955 + if (closure[i].func == probe &&
956 + closure[i].probe_private
957 + == probe_private)
958 + return entry;
959 + }
960 }
961 }
962 }
963 -iter_end:
964 - if (!found) {
965 - private = ERR_PTR(-ENOENT);
966 - goto end;
967 - }
968 - entry->refcount = 0;
969 - /* In what module is the probe handler ? */
970 - probe_module = __module_text_address((unsigned long)entry->probe);
971 - private = remove_marker(entry->name);
972 - deferred_sync = 1;
973 - mutex_unlock(&markers_mutex);
974 - marker_update_probes(probe_module);
975 - return private;
976 -end:
977 - mutex_unlock(&markers_mutex);
978 - return private;
979 + return NULL;
980 }
981 -EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
982
983 /**
984 - * marker_arm - Arm a marker
985 - * @name: marker name
986 + * marker_probe_unregister_private_data - Disconnect a probe from a marker
987 + * @probe: probe function
988 + * @probe_private: probe private data
989 *
990 - * Activate a marker. It keeps a reference count of the number of
991 - * arming/disarming done.
992 - * Returns 0 if ok, error value on error.
993 + * Unregister a probe by providing the registered private data.
994 + * Only removes the first marker found in hash table.
995 + * Return 0 on success or error value.
996 + * We do not need to call a synchronize_sched to make sure the probes have
997 + * finished running before doing a module unload, because the module unload
998 + * itself uses stop_machine(), which insures that every preempt disabled section
999 + * have finished.
1000 */
1001 -int marker_arm(const char *name)
1002 +int marker_probe_unregister_private_data(marker_probe_func *probe,
1003 + void *probe_private)
1004 {
1005 struct marker_entry *entry;
1006 int ret = 0;
1007 + struct marker_probe_closure *old;
1008
1009 mutex_lock(&markers_mutex);
1010 - entry = get_marker(name);
1011 + entry = get_marker_from_private_data(probe, probe_private);
1012 if (!entry) {
1013 ret = -ENOENT;
1014 goto end;
1015 }
1016 - /*
1017 - * Only need to update probes when refcount passes from 0 to 1.
1018 - */
1019 - if (entry->refcount++)
1020 - goto end;
1021 -end:
1022 + if (entry->rcu_pending)
1023 + rcu_barrier();
1024 + old = marker_entry_remove_probe(entry, NULL, probe_private);
1025 mutex_unlock(&markers_mutex);
1026 - marker_update_probes(NULL);
1027 - return ret;
1028 -}
1029 -EXPORT_SYMBOL_GPL(marker_arm);
1030 -
1031 -/**
1032 - * marker_disarm - Disarm a marker
1033 - * @name: marker name
1034 - *
1035 - * Disarm a marker. It keeps a reference count of the number of arming/disarming
1036 - * done.
1037 - * Returns 0 if ok, error value on error.
1038 - */
1039 -int marker_disarm(const char *name)
1040 -{
1041 - struct marker_entry *entry;
1042 - int ret = 0;
1043 -
1044 + marker_update_probes(); /* may update entry */
1045 mutex_lock(&markers_mutex);
1046 - entry = get_marker(name);
1047 - if (!entry) {
1048 - ret = -ENOENT;
1049 - goto end;
1050 - }
1051 - /*
1052 - * Only permit decrement refcount if higher than 0.
1053 - * Do probe update only on 1 -> 0 transition.
1054 - */
1055 - if (entry->refcount) {
1056 - if (--entry->refcount)
1057 - goto end;
1058 - } else {
1059 - ret = -EPERM;
1060 - goto end;
1061 - }
1062 + entry = get_marker_from_private_data(probe, probe_private);
1063 + WARN_ON(!entry);
1064 + entry->oldptr = old;
1065 + entry->rcu_pending = 1;
1066 + /* write rcu_pending before calling the RCU callback */
1067 + smp_wmb();
1068 + call_rcu(&entry->rcu, free_old_closure);
1069 + remove_marker(entry->name); /* Ignore busy error message */
1070 end:
1071 mutex_unlock(&markers_mutex);
1072 - marker_update_probes(NULL);
1073 return ret;
1074 }
1075 -EXPORT_SYMBOL_GPL(marker_disarm);
1076 +EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
1077
1078 /**
1079 * marker_get_private_data - Get a marker's probe private data
1080 * @name: marker name
1081 + * @probe: probe to match
1082 + * @num: get the nth matching probe's private data
1083 *
1084 + * Returns the nth private data pointer (starting from 0) matching, or an
1085 + * ERR_PTR.
1086 * Returns the private data pointer, or an ERR_PTR.
1087 * The private data pointer should _only_ be dereferenced if the caller is the
1088 * owner of the data, or its content could vanish. This is mostly used to
1089 * confirm that a caller is the owner of a registered probe.
1090 */
1091 -void *marker_get_private_data(const char *name)
1092 +void *marker_get_private_data(const char *name, marker_probe_func *probe,
1093 + int num)
1094 {
1095 struct hlist_head *head;
1096 struct hlist_node *node;
1097 struct marker_entry *e;
1098 size_t name_len = strlen(name) + 1;
1099 u32 hash = jhash(name, name_len-1, 0);
1100 - int found = 0;
1101 + int i;
1102
1103 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
1104 hlist_for_each_entry(e, node, head, hlist) {
1105 if (!strcmp(name, e->name)) {
1106 - found = 1;
1107 - return e->private;
1108 + if (!e->ptype) {
1109 + if (num == 0 && e->single.func == probe)
1110 + return e->single.probe_private;
1111 + else
1112 + break;
1113 + } else {
1114 + struct marker_probe_closure *closure;
1115 + int match = 0;
1116 + closure = e->multi;
1117 + for (i = 0; closure[i].func; i++) {
1118 + if (closure[i].func != probe)
1119 + continue;
1120 + if (match++ == num)
1121 + return closure[i].probe_private;
1122 + }
1123 + }
1124 }
1125 }
1126 return ERR_PTR(-ENOENT);
1127 diff -puN kernel/module.c~linux-kernel-markers-support-multiple-probes kernel/module.c
1128 --- a/kernel/module.c~linux-kernel-markers-support-multiple-probes
1129 +++ a/kernel/module.c
1130 @@ -1995,7 +1995,7 @@ static struct module *load_module(void _
1131 #ifdef CONFIG_MARKERS
1132 if (!mod->taints)
1133 marker_update_probe_range(mod->markers,
1134 - mod->markers + mod->num_markers, NULL, NULL);
1135 + mod->markers + mod->num_markers);
1136 #endif
1137 err = module_finalize(hdr, sechdrs, mod);
1138 if (err < 0)
1139 @@ -2506,7 +2506,7 @@ EXPORT_SYMBOL(struct_module);
1140 #endif
1141
1142 #ifdef CONFIG_MARKERS
1143 -void module_update_markers(struct module *probe_module, int *refcount)
1144 +void module_update_markers(void)
1145 {
1146 struct module *mod;
1147
1148 @@ -2514,8 +2514,7 @@ void module_update_markers(struct module
1149 list_for_each_entry(mod, &modules, list)
1150 if (!mod->taints)
1151 marker_update_probe_range(mod->markers,
1152 - mod->markers + mod->num_markers,
1153 - probe_module, refcount);
1154 + mod->markers + mod->num_markers);
1155 mutex_unlock(&module_mutex);
1156 }
1157 #endif
1158 diff -puN samples/markers/probe-example.c~linux-kernel-markers-support-multiple-probes samples/markers/probe-example.c
1159 --- a/samples/markers/probe-example.c~linux-kernel-markers-support-multiple-probes
1160 +++ a/samples/markers/probe-example.c
1161 @@ -20,31 +20,27 @@ struct probe_data {
1162 marker_probe_func *probe_func;
1163 };
1164
1165 -void probe_subsystem_event(const struct marker *mdata, void *private,
1166 - const char *format, ...)
1167 +void probe_subsystem_event(void *probe_data, void *call_data,
1168 + const char *format, va_list *args)
1169 {
1170 - va_list ap;
1171 /* Declare args */
1172 unsigned int value;
1173 const char *mystr;
1174
1175 /* Assign args */
1176 - va_start(ap, format);
1177 - value = va_arg(ap, typeof(value));
1178 - mystr = va_arg(ap, typeof(mystr));
1179 + value = va_arg(*args, typeof(value));
1180 + mystr = va_arg(*args, typeof(mystr));
1181
1182 /* Call printk */
1183 - printk(KERN_DEBUG "Value %u, string %s\n", value, mystr);
1184 + printk(KERN_INFO "Value %u, string %s\n", value, mystr);
1185
1186 /* or count, check rights, serialize data in a buffer */
1187 -
1188 - va_end(ap);
1189 }
1190
1191 atomic_t eventb_count = ATOMIC_INIT(0);
1192
1193 -void probe_subsystem_eventb(const struct marker *mdata, void *private,
1194 - const char *format, ...)
1195 +void probe_subsystem_eventb(void *probe_data, void *call_data,
1196 + const char *format, va_list *args)
1197 {
1198 /* Increment counter */
1199 atomic_inc(&eventb_count);
1200 @@ -72,10 +68,6 @@ static int __init probe_init(void)
1201 if (result)
1202 printk(KERN_INFO "Unable to register probe %s\n",
1203 probe_array[i].name);
1204 - result = marker_arm(probe_array[i].name);
1205 - if (result)
1206 - printk(KERN_INFO "Unable to arm probe %s\n",
1207 - probe_array[i].name);
1208 }
1209 return 0;
1210 }
1211 @@ -85,7 +77,8 @@ static void __exit probe_fini(void)
1212 int i;
1213
1214 for (i = 0; i < ARRAY_SIZE(probe_array); i++)
1215 - marker_probe_unregister(probe_array[i].name);
1216 + marker_probe_unregister(probe_array[i].name,
1217 + probe_array[i].probe_func, &probe_array[i]);
1218 printk(KERN_INFO "Number of event b : %u\n",
1219 atomic_read(&eventb_count));
1220 }
1221 _
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.