1 #include <linux/types.h>
2 #include <linux/ring_buffer.h>
3 #include <linux/wait.h>
4 #include <linux/poll.h>
5 #include <linux/cpumask.h>
7 static DEFINE_PER_CPU(local_t
, _stp_cpu_disabled
);
9 static inline void _stp_ring_buffer_disable_cpu(void)
12 local_inc(&__get_cpu_var(_stp_cpu_disabled
));
15 static inline void _stp_ring_buffer_enable_cpu(void)
17 local_dec(&__get_cpu_var(_stp_cpu_disabled
));
21 static inline int _stp_ring_buffer_cpu_disabled(void)
23 return local_read(&__get_cpu_var(_stp_cpu_disabled
));
26 #ifndef STP_RELAY_TIMER_INTERVAL
27 /* Wakeup timer interval in jiffies (default 10 ms) */
28 #define STP_RELAY_TIMER_INTERVAL ((HZ + 99) / 100)
31 struct _stp_data_entry
{
37 * Trace iterator - used by printout routines who present trace
38 * results to users and which routines might sleep, etc:
40 struct _stp_iterator
{
42 struct ring_buffer_iter
*buffer_iter
[NR_CPUS
];
48 /* In bulk mode, we need 1 'struct _stp_iterator' for each cpu. In
49 * 'normal' mode, we only need 1 'struct _stp_iterator' (since all
50 * output is sent through 1 file). */
52 #define NR_ITERS NR_CPUS
57 struct _stp_relay_data_type
{
58 atomic_t
/* enum _stp_transport_state */ transport_state
;
59 struct ring_buffer
*rb
;
60 struct _stp_iterator iter
[NR_ITERS
];
61 cpumask_var_t trace_reader_cpumask
;
62 struct timer_list timer
;
65 static struct _stp_relay_data_type _stp_relay_data
;
67 /* _stp_poll_wait is a waitqueue for tasks blocked on
68 * _stp_data_poll_trace() */
69 static DECLARE_WAIT_QUEUE_HEAD(_stp_poll_wait
);
71 static void __stp_free_ring_buffer(void)
73 free_cpumask_var(_stp_relay_data
.trace_reader_cpumask
);
74 if (_stp_relay_data
.rb
)
75 ring_buffer_free(_stp_relay_data
.rb
);
76 _stp_relay_data
.rb
= NULL
;
79 static int __stp_alloc_ring_buffer(void)
82 unsigned long buffer_size
= _stp_bufsize
;
84 if (!alloc_cpumask_var(&_stp_relay_data
.trace_reader_cpumask
,
87 cpumask_clear(_stp_relay_data
.trace_reader_cpumask
);
89 if (buffer_size
== 0) {
90 dbug_trans(1, "using default buffer size...\n");
91 buffer_size
= _stp_nsubbufs
* _stp_subbuf_size
;
93 /* The number passed to ring_buffer_alloc() is per cpu. Our
94 * 'buffer_size' is a total number of bytes to allocate. So,
95 * we need to divide buffer_size by the number of cpus. */
96 buffer_size
/= num_online_cpus();
97 dbug_trans(1, "%lu\n", buffer_size
);
98 _stp_relay_data
.rb
= ring_buffer_alloc(buffer_size
, 0);
99 if (!_stp_relay_data
.rb
)
102 dbug_trans(0, "size = %lu\n", ring_buffer_size(_stp_relay_data
.rb
));
106 __stp_free_ring_buffer();
110 static int _stp_data_open_trace(struct inode
*inode
, struct file
*file
)
112 struct _stp_iterator
*iter
= inode
->i_private
;
114 int cpu_file
= iter
->cpu_file
;
117 /* We only allow for one reader per cpu */
118 dbug_trans(1, "trace attach\n");
120 if (!cpumask_test_cpu(cpu_file
, _stp_relay_data
.trace_reader_cpumask
))
121 cpumask_set_cpu(cpu_file
, _stp_relay_data
.trace_reader_cpumask
);
123 dbug_trans(1, "returning EBUSY\n");
127 if (!cpumask_empty(_stp_relay_data
.trace_reader_cpumask
)) {
128 dbug_trans(1, "returning EBUSY\n");
131 cpumask_setall(_stp_relay_data
.trace_reader_cpumask
);
133 file
->private_data
= inode
->i_private
;
137 static int _stp_data_release_trace(struct inode
*inode
, struct file
*file
)
139 struct _stp_iterator
*iter
= inode
->i_private
;
141 dbug_trans(1, "trace detach\n");
143 cpumask_clear_cpu(iter
->cpu_file
, _stp_relay_data
.trace_reader_cpumask
);
145 cpumask_clear(_stp_relay_data
.trace_reader_cpumask
);
151 _stp_event_to_user(struct ring_buffer_event
*event
, char __user
*ubuf
,
155 struct _stp_data_entry
*entry
;
157 dbug_trans(1, "event(%p), ubuf(%p), cnt(%lu)\n", event
, ubuf
, cnt
);
158 if (event
== NULL
|| ubuf
== NULL
) {
159 dbug_trans(1, "returning -EFAULT(1)\n");
163 entry
= ring_buffer_event_data(event
);
165 dbug_trans(1, "returning -EFAULT(2)\n");
169 /* We don't do partial entries - just fail. */
170 if (entry
->len
> cnt
) {
171 dbug_trans(1, "returning -EBUSY\n");
175 #if defined(DEBUG_TRANS) && (DEBUG_TRANS >= 2)
177 char *last
= entry
->buf
+ (entry
->len
- 5);
178 dbug_trans2("copying %.5s...%.5s\n", entry
->buf
, last
);
182 if (cnt
> entry
->len
)
184 ret
= copy_to_user(ubuf
, entry
->buf
, cnt
);
186 dbug_trans(1, "returning -EFAULT(3)\n");
193 static int _stp_ring_buffer_empty_cpu(struct _stp_iterator
*iter
)
198 cpu
= iter
->cpu_file
;
199 if (iter
->buffer_iter
[cpu
]) {
200 if (ring_buffer_iter_empty(iter
->buffer_iter
[cpu
]))
204 if (atomic_read(&iter
->nr_events
) == 0)
209 for_each_online_cpu(cpu
) {
210 if (iter
->buffer_iter
[cpu
]) {
211 if (!ring_buffer_iter_empty(iter
->buffer_iter
[cpu
]))
215 if (atomic_read(&iter
->nr_events
) != 0)
223 static int _stp_ring_buffer_empty(void)
225 struct _stp_iterator
*iter
;
229 for_each_possible_cpu(cpu
) {
230 iter
= &_stp_relay_data
.iter
[cpu
];
231 if (! _stp_ring_buffer_empty_cpu(iter
))
236 iter
= &_stp_relay_data
.iter
[0];
237 return _stp_ring_buffer_empty_cpu(iter
);
241 static void _stp_ring_buffer_iterator_increment(struct _stp_iterator
*iter
)
243 if (iter
->buffer_iter
[iter
->cpu
]) {
244 _stp_ring_buffer_disable_cpu();
245 ring_buffer_read(iter
->buffer_iter
[iter
->cpu
], NULL
);
246 _stp_ring_buffer_enable_cpu();
250 static void _stp_ring_buffer_consume(struct _stp_iterator
*iter
)
252 _stp_ring_buffer_iterator_increment(iter
);
253 _stp_ring_buffer_disable_cpu();
254 ring_buffer_consume(_stp_relay_data
.rb
, iter
->cpu
, &iter
->ts
);
255 _stp_ring_buffer_enable_cpu();
256 atomic_dec(&iter
->nr_events
);
259 static ssize_t
_stp_tracing_wait_pipe(struct file
*filp
)
261 struct _stp_iterator
*iter
= filp
->private_data
;
263 if (atomic_read(&iter
->nr_events
) == 0) {
264 if ((filp
->f_flags
& O_NONBLOCK
)) {
265 dbug_trans(1, "returning -EAGAIN\n");
269 if (signal_pending(current
)) {
270 dbug_trans(1, "returning -EINTR\n");
273 dbug_trans(1, "returning 0\n");
277 dbug_trans(1, "returning 1\n");
281 static struct ring_buffer_event
*
282 _stp_peek_next_event(struct _stp_iterator
*iter
, int cpu
, u64
*ts
)
284 struct ring_buffer_event
*event
;
286 _stp_ring_buffer_disable_cpu();
287 if (iter
->buffer_iter
[cpu
])
288 event
= ring_buffer_iter_peek(iter
->buffer_iter
[cpu
], ts
);
290 event
= ring_buffer_peek(_stp_relay_data
.rb
, cpu
, ts
);
291 _stp_ring_buffer_enable_cpu();
295 /* Find the next real event */
296 static struct ring_buffer_event
*
297 _stp_find_next_event(struct _stp_iterator
*iter
)
299 struct ring_buffer_event
*event
;
302 int cpu_file
= iter
->cpu_file
;
305 * If we are in a per_cpu trace file, don't bother by iterating over
306 * all cpus and peek directly.
308 if (iter
->buffer_iter
[cpu_file
] == NULL
) {
309 if (atomic_read(&iter
->nr_events
) == 0)
313 if (ring_buffer_iter_empty(iter
->buffer_iter
[cpu_file
]))
316 event
= _stp_peek_next_event(iter
, cpu_file
, &iter
->ts
);
320 struct ring_buffer_event
*next
= NULL
;
325 for_each_online_cpu(cpu
) {
326 if (iter
->buffer_iter
[cpu
] == NULL
) {
327 if (atomic_read(&iter
->nr_events
) == 0)
331 if (ring_buffer_iter_empty(iter
->buffer_iter
[cpu
]))
335 event
= _stp_peek_next_event(iter
, cpu
, &ts
);
338 * Pick the event with the smallest timestamp:
340 if (event
&& (!next
|| ts
< next_ts
)) {
347 iter
->cpu
= next_cpu
;
355 _stp_buffer_iter_finish(struct _stp_iterator
*iter
)
358 int cpu_file
= iter
->cpu_file
;
360 if (iter
->buffer_iter
[cpu_file
]) {
361 ring_buffer_read_finish(iter
->buffer_iter
[cpu_file
]);
362 iter
->buffer_iter
[cpu_file
] = NULL
;
367 for_each_possible_cpu(cpu
) {
368 if (iter
->buffer_iter
[cpu
]) {
369 ring_buffer_read_finish(iter
->buffer_iter
[cpu
]);
370 iter
->buffer_iter
[cpu
] = NULL
;
374 dbug_trans(0, "iterator(s) finished\n");
379 _stp_buffer_iter_start(struct _stp_iterator
*iter
)
382 int cpu_file
= iter
->cpu_file
;
384 iter
->buffer_iter
[cpu_file
]
385 = ring_buffer_read_start(_stp_relay_data
.rb
, cpu_file
);
386 if (iter
->buffer_iter
[cpu_file
] == NULL
) {
387 dbug_trans(0, "buffer_iter[%d] was NULL\n", cpu_file
);
390 dbug_trans(0, "iterator(s) started\n");
395 for_each_online_cpu(cpu
) {
396 iter
->buffer_iter
[cpu
]
397 = ring_buffer_read_start(_stp_relay_data
.rb
, cpu
);
398 if (iter
->buffer_iter
[cpu
] == NULL
) {
399 dbug_trans(0, "buffer_iter[%d] was NULL\n", cpu
);
400 _stp_buffer_iter_finish(iter
);
404 dbug_trans(0, "iterator(s) started\n");
414 _stp_data_read_trace(struct file
*filp
, char __user
*ubuf
,
415 size_t cnt
, loff_t
*ppos
)
418 struct ring_buffer_event
*event
;
419 struct _stp_iterator
*iter
= filp
->private_data
;
421 int cpu_file
= iter
->cpu_file
;
426 dbug_trans(1, "%lu\n", (unsigned long)cnt
);
427 sret
= _stp_tracing_wait_pipe(filp
);
428 dbug_trans(1, "_stp_tracing_wait_pipe returned %ld\n", sret
);
432 if (cnt
>= PAGE_SIZE
)
435 dbug_trans(1, "sret = %lu\n", (unsigned long)sret
);
439 if (_stp_buffer_iter_start(iter
))
442 while ((event
= _stp_find_next_event(iter
)) != NULL
) {
446 _stp_buffer_iter_finish(iter
);
448 len
= _stp_event_to_user(event
, ubuf
, cnt
);
452 _stp_ring_buffer_consume(iter
);
453 dbug_trans(1, "event consumed\n");
460 if (_stp_buffer_iter_start(iter
))
466 _stp_buffer_iter_finish(iter
);
474 _stp_data_poll_trace(struct file
*filp
, poll_table
*poll_table
)
476 struct _stp_iterator
*iter
= filp
->private_data
;
478 dbug_trans(1, "entry\n");
479 if (! _stp_ring_buffer_empty_cpu(iter
))
480 return POLLIN
| POLLRDNORM
;
481 poll_wait(filp
, &_stp_poll_wait
, poll_table
);
482 if (! _stp_ring_buffer_empty_cpu(iter
))
483 return POLLIN
| POLLRDNORM
;
485 dbug_trans(1, "exit\n");
489 static struct file_operations __stp_data_fops
= {
490 .owner
= THIS_MODULE
,
491 .open
= _stp_data_open_trace
,
492 .release
= _stp_data_release_trace
,
493 .poll
= _stp_data_poll_trace
,
494 .read
= _stp_data_read_trace
,
497 static struct _stp_iterator
*_stp_get_iterator(void)
500 int cpu
= raw_smp_processor_id();
501 return &_stp_relay_data
.iter
[cpu
];
503 return &_stp_relay_data
.iter
[0];
508 * Here's how __STP_MAX_RESERVE_SIZE is figured. The value of
509 * BUF_PAGE_SIZE was gotten from the kernel's ring_buffer code. It
510 * is divided by 4, so we waste a maximum of 1/4 of the buffer (in
511 * the case of a small reservation).
513 #define __STP_MAX_RESERVE_SIZE ((/*BUF_PAGE_SIZE*/ 4080 / 4) \
514 - sizeof(struct _stp_data_entry) \
515 - sizeof(struct ring_buffer_event))
518 * This function prepares the cpu buffer to write a sample.
520 * Struct op_entry is used during operations on the ring buffer while
521 * struct op_sample contains the data that is stored in the ring
522 * buffer. Struct entry can be uninitialized. The function reserves a
523 * data array that is specified by size. Use
524 * op_cpu_buffer_write_commit() after preparing the sample. In case of
525 * errors a null pointer is returned, otherwise the pointer to the
530 _stp_data_write_reserve(size_t size_request
, void **entry
)
532 struct ring_buffer_event
*event
;
533 struct _stp_data_entry
*sde
;
534 struct _stp_iterator
*iter
= _stp_get_iterator();
539 if (size_request
> __STP_MAX_RESERVE_SIZE
) {
540 size_request
= __STP_MAX_RESERVE_SIZE
;
543 if (_stp_ring_buffer_cpu_disabled()) {
544 dbug_trans(0, "cpu disabled\n");
549 #ifdef STAPCONF_RING_BUFFER_FLAGS
550 event
= ring_buffer_lock_reserve(_stp_relay_data
.rb
,
551 (sizeof(struct _stp_data_entry
)
554 event
= ring_buffer_lock_reserve(_stp_relay_data
.rb
,
555 (sizeof(struct _stp_data_entry
)
559 if (unlikely(! event
)) {
560 dbug_trans(0, "event = NULL (%p)?\n", event
);
561 if (! _stp_relay_data
.overwrite_flag
) {
566 if (_stp_buffer_iter_start(iter
)) {
571 /* If we're in overwrite mode and all the buffers are
572 * full, take a event out of the buffer and consume it
573 * (throw it away). This should make room for the new
575 event
= _stp_find_next_event(iter
);
579 sde
= ring_buffer_event_data(event
);
580 if (sde
->len
< size_request
)
581 size_request
= sde
->len
;
582 _stp_ring_buffer_consume(iter
);
583 _stp_buffer_iter_finish(iter
);
585 /* Try to reserve again. */
586 #ifdef STAPCONF_RING_BUFFER_FLAGS
587 event
= ring_buffer_lock_reserve(_stp_relay_data
.rb
,
588 sizeof(struct _stp_data_entry
) + size_request
,
591 event
= ring_buffer_lock_reserve(_stp_relay_data
.rb
,
592 sizeof(struct _stp_data_entry
) + size_request
);
594 dbug_trans(0, "overwritten event = 0x%p\n", event
);
597 _stp_buffer_iter_finish(iter
);
600 if (unlikely(! event
)) {
606 sde
= ring_buffer_event_data(event
);
607 sde
->len
= size_request
;
613 static unsigned char *_stp_data_entry_data(void *entry
)
615 struct ring_buffer_event
*event
= entry
;
616 struct _stp_data_entry
*sde
;
621 sde
= ring_buffer_event_data(event
);
625 static int _stp_data_write_commit(void *entry
)
627 struct ring_buffer_event
*event
= (struct ring_buffer_event
*)entry
;
629 if (unlikely(! entry
)) {
630 dbug_trans(1, "entry = NULL, returning -EINVAL\n");
634 #if defined(DEBUG_TRANS) && (DEBUG_TRANS >= 2)
636 struct _stp_data_entry
*sde
= ring_buffer_event_data(event
);
637 char *last
= sde
->buf
+ (sde
->len
- 5);
638 dbug_trans2("commiting %.5s...%.5s\n", sde
->buf
, last
);
641 atomic_inc(&(_stp_get_iterator()->nr_events
));
643 #ifdef STAPCONF_RING_BUFFER_FLAGS
644 return ring_buffer_unlock_commit(_stp_relay_data
.rb
, event
, 0);
646 return ring_buffer_unlock_commit(_stp_relay_data
.rb
, event
);
650 static void __stp_relay_wakeup_timer(unsigned long val
)
652 if (waitqueue_active(&_stp_poll_wait
) && ! _stp_ring_buffer_empty())
653 wake_up_interruptible(&_stp_poll_wait
);
654 if (atomic_read(&_stp_relay_data
.transport_state
) == STP_TRANSPORT_RUNNING
)
655 mod_timer(&_stp_relay_data
.timer
, jiffies
+ STP_RELAY_TIMER_INTERVAL
);
657 dbug_trans(0, "ring_buffer wakeup timer expiry\n");
660 static void __stp_relay_timer_start(void)
662 init_timer(&_stp_relay_data
.timer
);
663 _stp_relay_data
.timer
.expires
= jiffies
+ STP_RELAY_TIMER_INTERVAL
;
664 _stp_relay_data
.timer
.function
= __stp_relay_wakeup_timer
;
665 _stp_relay_data
.timer
.data
= 0;
666 add_timer(&_stp_relay_data
.timer
);
670 static void __stp_relay_timer_stop(void)
672 del_timer_sync(&_stp_relay_data
.timer
);
675 static struct dentry
*__stp_entry
[NR_CPUS
] = { NULL
};
677 static int _stp_transport_data_fs_init(void)
682 atomic_set (&_stp_relay_data
.transport_state
, STP_TRANSPORT_STOPPED
);
683 _stp_relay_data
.rb
= NULL
;
686 dbug_trans(1, "entry...\n");
687 rc
= __stp_alloc_ring_buffer();
692 for_each_online_cpu(cpu
) {
693 char cpu_file
[9]; /* 5(trace) + 3(XXX) + 1(\0) = 9 */
695 if (cpu
> 999 || cpu
< 0) {
696 _stp_transport_data_fs_close();
699 snprintf(cpu_file
, sizeof(cpu_file
), "trace%d", cpu
);
700 __stp_entry
[cpu
] = debugfs_create_file(cpu_file
, 0600,
701 _stp_get_module_dir(),
705 if (!__stp_entry
[cpu
]) {
706 pr_warning("Could not create debugfs 'trace' entry\n");
707 __stp_free_ring_buffer();
710 else if (IS_ERR(__stp_entry
[cpu
])) {
711 rc
= PTR_ERR(__stp_entry
[cpu
]);
712 pr_warning("Could not create debugfs 'trace' entry\n");
713 __stp_free_ring_buffer();
717 __stp_entry
[cpu
]->d_inode
->i_uid
= _stp_uid
;
718 __stp_entry
[cpu
]->d_inode
->i_gid
= _stp_gid
;
719 __stp_entry
[cpu
]->d_inode
->i_private
= &_stp_relay_data
.iter
[cpu
];
726 for_each_possible_cpu(cpu
) {
728 _stp_relay_data
.iter
[cpu
].cpu_file
= cpu
;
729 _stp_relay_data
.iter
[cpu
].cpu
= cpu
;
731 for_each_possible_cpu(cpu2
) {
732 _stp_relay_data
.iter
[cpu
].buffer_iter
[cpu2
] = NULL
;
735 atomic_set(&_stp_relay_data
.iter
[cpu
].nr_events
, 0);
742 dbug_trans(1, "returning 0...\n");
743 atomic_set (&_stp_relay_data
.transport_state
, STP_TRANSPORT_INITIALIZED
);
747 static void _stp_transport_data_fs_start(void)
749 if (atomic_read(&_stp_relay_data
.transport_state
) == STP_TRANSPORT_INITIALIZED
) {
750 atomic_set(&_stp_relay_data
.transport_state
, STP_TRANSPORT_RUNNING
);
751 __stp_relay_timer_start();
755 static void _stp_transport_data_fs_stop(void)
757 if (atomic_read(&_stp_relay_data
.transport_state
) == STP_TRANSPORT_RUNNING
) {
758 atomic_set(&_stp_relay_data
.transport_state
, STP_TRANSPORT_STOPPED
);
759 __stp_relay_timer_stop();
763 static void _stp_transport_data_fs_close(void)
767 for_each_possible_cpu(cpu
) {
768 if (__stp_entry
[cpu
])
769 debugfs_remove(__stp_entry
[cpu
]);
770 __stp_entry
[cpu
] = NULL
;
773 __stp_free_ring_buffer();
776 static enum _stp_transport_state
_stp_transport_get_state(void)
778 return atomic_read (&_stp_relay_data
.transport_state
);
781 static void _stp_transport_data_fs_overwrite(int overwrite
)
783 dbug_trans(0, "setting ovewrite to %d\n", overwrite
);
784 _stp_relay_data
.overwrite_flag
= overwrite
;