From 4eea0069b8b677657dcfea3effa2f342d0e5d27f Mon Sep 17 00:00:00 2001 From: Prerna Saxena Date: Sun, 22 Nov 2009 23:40:39 +0530 Subject: [PATCH] Added tracepoint-based probes to block IO and IO scheduler tapsets. Added testcases for these probes. Also, modified comments in ioblock.stp so that tapset docmentation can be automatically generated for this tapset. --- doc/SystemTap_Tapset_Reference/tapsets.tmpl | 5 +- tapset/ioblock.stp | 221 +++++++++++++++----- tapset/ioscheduler.stp | 191 ++++++++++++++++- testsuite/buildok/ioblock_test.stp | 7 + testsuite/buildok/ioscheduler.stp | 13 ++ 5 files changed, 385 insertions(+), 52 deletions(-) diff --git a/doc/SystemTap_Tapset_Reference/tapsets.tmpl b/doc/SystemTap_Tapset_Reference/tapsets.tmpl index addcf88d3..bb855d29d 100644 --- a/doc/SystemTap_Tapset_Reference/tapsets.tmpl +++ b/doc/SystemTap_Tapset_Reference/tapsets.tmpl @@ -167,12 +167,13 @@ - IO Scheduler Tapset + IO Scheduler and block IO Tapset - This family of probe points is used to probe IO scheduler activities. + This family of probe points is used to probe block IO layer and IO scheduler activities. It contains the following probe points: !Itapset/ioscheduler.stp +!Itapset/ioblock.stp diff --git a/tapset/ioblock.stp b/tapset/ioblock.stp index bc64c4258..1bc06699a 100644 --- a/tapset/ioblock.stp +++ b/tapset/ioblock.stp @@ -78,18 +78,13 @@ function __bio_devname:string(bio:long) global BIO_READ = 0, BIO_WRITE = 1 -/* probe ioblock.request +/** + * probe ioblock.request - Fires whenever making a generic block I/O request. * - * Fires whenever making a generic block I/O request. - * - * Context: - * The process makes block I/O request - * - * Variables: - * devname - block device name - * ino - i-node number of the mapped file - * sector - beginning sector for the entire bio - * flags - see below + * @devname - block device name + * @ino - i-node number of the mapped file + * @sector - beginning sector for the entire bio + * @flags - see below * BIO_UPTODATE 0 ok after I/O completion * BIO_RW_BLOCK 1 RW_AHEAD set, and read/write would block * BIO_EOF 2 out-out-bounds error @@ -99,20 +94,18 @@ global BIO_READ = 0, BIO_WRITE = 1 * BIO_USER_MAPPED 6 contains user pages * BIO_EOPNOTSUPP 7 not supported * - * rw - binary trace for read/write request - * vcnt - bio vector count which represents number of array element (page, - * offset, length) which make up this I/O request - * idx - offset into the bio vector array - * phys_segments - number of segments in this bio after physical address - * coalescing is performed. - * hw_segments - number of segments after physical and DMA remapping - * hardware coalescing is performed - * size - total size in bytes - * bdev - target block device - * bdev_contains - points to the device object which contains the - * partition (when bio structure represents a partition) - * p_start_sect - points to the start sector of the partition - * structure of the device + * @rw - binary trace for read/write request + * @vcnt - bio vector count which represents number of array element (page, offset, length) which make up this I/O request + * @idx - offset into the bio vector array + * @phys_segments - number of segments in this bio after physical address coalescing is performed + * @hw_segments - number of segments after physical and DMA remapping hardware coalescing is performed + * @size - total size in bytes + * @bdev - target block device + * @bdev_contains - points to the device object which contains the partition (when bio structure represents a partition) + * @p_start_sect - points to the start sector of the partition structure of the device + * + * Context: + * The process makes block I/O request */ probe ioblock.request = kernel.function ("generic_make_request") { @@ -135,19 +128,14 @@ probe ioblock.request = kernel.function ("generic_make_request") p_start_sect = __bio_start_sect($bio) } -/* probe ioblock.end - * - * Fires whenever a block I/O transfer is complete. - * - * Context: - * The process signals the transfer is done. +/** + * probe ioblock.end - Fires whenever a block I/O transfer is complete. * - * Variables: - * devname - block device name - * ino - i-node number of the mapped file - * byte_done - number of bytes transferred - * sector - beginning sector for the entire bio - * flags - see below + * @devname - block device name + * @ino - i-node number of the mapped file + * @bytes_done - number of bytes transferred + * @sector - beginning sector for the entire bio + * @flags - see below * BIO_UPTODATE 0 ok after I/O completion * BIO_RW_BLOCK 1 RW_AHEAD set, and read/write would block * BIO_EOF 2 out-out-bounds error @@ -156,16 +144,16 @@ probe ioblock.request = kernel.function ("generic_make_request") * BIO_BOUNCED 5 bio is a bounce bio * BIO_USER_MAPPED 6 contains user pages * BIO_EOPNOTSUPP 7 not supported - * error - 0 on success - * rw - binary trace for read/write request - * vcnt - bio vector count which represents number of array element (page, - * offset, length) which makes up this I/O request - * idx - offset into the bio vector array - * phys_segments - number of segments in this bio after physical address - * coalescing is performed. - * hw_segments - number of segments after physical and DMA remapping - * hardware coalescing is performed - * size - total size in bytes + * @error - 0 on success + * @rw - binary trace for read/write request + * @vcnt - bio vector count which represents number of array element (page, offset, length) which makes up this I/O request + * @idx - offset into the bio vector array + * @phys_segments - number of segments in this bio after physical address coalescing is performed. + * @hw_segments - number of segments after physical and DMA remapping hardware coalescing is performed + * @size - total size in bytes + * + * Context: + * The process signals the transfer is done. */ probe ioblock.end = kernel.function("bio_endio") { @@ -186,3 +174,142 @@ probe ioblock.end = kernel.function("bio_endio") %) size = $bio->bi_size } + +/** + * probe ioblock_trace.bounce - Fires whenever a buffer bounce is needed for at least one page of a block IO request. + * + * @bio struct bio * + * @q struct request_queue* + * @devname device for which a buffer bounce was needed. + * @ino - i-node number of the mapped file + * @bytes_done - number of bytes transferred + * @sector - beginning sector for the entire bio + * @flags - see below + * BIO_UPTODATE 0 ok after I/O completion + * BIO_RW_BLOCK 1 RW_AHEAD set, and read/write would block + * BIO_EOF 2 out-out-bounds error + * BIO_SEG_VALID 3 nr_hw_seg valid + * BIO_CLONED 4 doesn't own data + * BIO_BOUNCED 5 bio is a bounce bio + * BIO_USER_MAPPED 6 contains user pages + * BIO_EOPNOTSUPP 7 not supported + * @error - 0 on success + * @rw - binary trace for read/write request + * @vcnt - bio vector count which represents number of array element (page, offset, length) which makes up this I/O request + * @idx - offset into the bio vector array + * @phys_segments - number of segments in this bio after physical address coalescing is performed. + * @size - total size in bytes + * + * Context : + * The process creating a block IO request. + */ +probe ioblock_trace.bounce = kernel.trace("block_bio_bounce") +{ + devname = __bio_devname($bio) + ino = __bio_ino($bio) + + bytes_done = $bio->bi_size + sector = $bio->bi_sector + flags = $bio->bi_flags + rw = $bio->bi_rw + vcnt = $bio->bi_vcnt + idx = $bio->bi_idx + phys_segments = $bio->bi_phys_segments + size = $bio->bi_size +} + +/** + * probe ioblock_trace.request - Fires just as a generic block I/O request is created for a bio. + * + * @bio struct bio* for which IO request is to be submitted + * @q struct request_queue* to which the request is to be added + * @devname - block device name + * @ino - i-node number of the mapped file + * @sector - beginning sector for the entire bio + * @flags - see below + * BIO_UPTODATE 0 ok after I/O completion + * BIO_RW_BLOCK 1 RW_AHEAD set, and read/write would block + * BIO_EOF 2 out-out-bounds error + * BIO_SEG_VALID 3 nr_hw_seg valid + * BIO_CLONED 4 doesn't own data + * BIO_BOUNCED 5 bio is a bounce bio + * BIO_USER_MAPPED 6 contains user pages + * BIO_EOPNOTSUPP 7 not supported + * + * @rw - binary trace for read/write request + * @vcnt - bio vector count which represents number of array element (page, offset, length) which make up this I/O request + * @idx - offset into the bio vector array + * @phys_segments - number of segments in this bio after physical address coalescing is performed. + * @size - total size in bytes + * @bdev - target block device + * @bdev_contains - points to the device object which contains the partition (when bio structure represents a partition) + * @p_start_sect - points to the start sector of the partition structure of the device + * + * Context: + * The process makes block I/O request + */ + +probe ioblock_trace.request = kernel.trace("block_bio_queue") +{ + devname = __bio_devname($bio) + ino = __bio_ino($bio) + + bytes_done = $bio->bi_size + error = $error + sector = $bio->bi_sector + flags = $bio->bi_flags + rw = $bio->bi_rw + vcnt = $bio->bi_vcnt + idx = $bio->bi_idx + phys_segments = $bio->bi_phys_segments + size = $bio->bi_size + bdev_contains = $bio->bi_bdev->bd_contains + bdev = $bio->bi_bdev + p_start_sect = __bio_start_sect($bio) +} + +/** + * probe ioblock_trace.end - Fires whenever a block I/O transfer is complete. + * + * @q - request queue on which this bio was queued. + * @devname - block device name + * @ino - i-node number of the mapped file + * @bytes_done - number of bytes transferred + * @sector - beginning sector for the entire bio + * @flags - see below + * BIO_UPTODATE 0 ok after I/O completion + * BIO_RW_BLOCK 1 RW_AHEAD set, and read/write would block + * BIO_EOF 2 out-out-bounds error + * BIO_SEG_VALID 3 nr_hw_seg valid + * BIO_CLONED 4 doesn't own data + * BIO_BOUNCED 5 bio is a bounce bio + * BIO_USER_MAPPED 6 contains user pages + * BIO_EOPNOTSUPP 7 not supported + + * @error - 0 on success + * @rw - binary trace for read/write request + * @vcnt - bio vector count which represents number of array element (page, offset, length) which makes up this I/O request + * @idx - offset into the bio vector array + * @phys_segments - number of segments in this bio after physical address coalescing is performed. + * @size - total size in bytes + * + * Context: + * The process signals the transfer is done. + */ +probe ioblock_trace.end = kernel.trace("block_bio_complete") +{ + q = $q + devname = __bio_devname($bio) + ino = __bio_ino($bio) + + bytes_done = $bio->bi_size + error = $error + + sector = $bio->bi_sector + flags = $bio->bi_flags + rw = $bio->bi_rw + vcnt = $bio->bi_vcnt + idx = $bio->bi_idx + phys_segments = $bio->bi_phys_segments + size = $bio->bi_size +} diff --git a/tapset/ioscheduler.stp b/tapset/ioscheduler.stp index 637e27836..ac271f803 100644 --- a/tapset/ioscheduler.stp +++ b/tapset/ioscheduler.stp @@ -68,22 +68,24 @@ probe ioscheduler.elv_next_request.return } /** - * probe ioscheduler.elv_add_request - A request was added to the request queue + * probe ioscheduler.elv_add_request.kp - kprobe based probe to indicate that a request was added to the request queue * @elevator_name: The type of I/O elevator currently enabled + * @q: pointer to request queue * @req: Address of the request * @req_flags: Request flags * @disk_major: Disk major number of the request * @disk_minor: Disk minor number of the request */ // when a request is added to the request queue -probe ioscheduler.elv_add_request - = kernel.function("__elv_add_request") +probe ioscheduler.elv_add_request.kp + = kernel.function("elv_insert") { %( kernel_v >= "2.6.10" %? elevator_name = kernel_string($q->elevator->elevator_type->elevator_name) %: elevator_name = kernel_string($q->elevator->elevator_name) %) + q = $q if($rq == 0) { disk_major = -1 disk_minor = -1 @@ -142,6 +144,189 @@ probe ioscheduler.elv_completed_request %) } +/** + * probe ioscheduler.elv_add_request.tp : tracepoint based probe to indicate a request is added to the request queue. + * @elevator_name : The type of I/O elevator currently enabled. + * @q : Pointer to request queue. + * @rq : Address of request. + * @rq_flags : Request flags. + * @disk_major : Disk major no of request. + * @disk_minor : Disk minor number of request. + * + */ +probe ioscheduler.elv_add_request.tp + = kernel.trace("block_rq_insert") +{ +q = $q +elevator_name = kernel_string($q->elevator->elevator_type->elevator_name) +rq = $rq + +if ($rq == 0 || $rq->rq_disk ==0) { + disk_major = -1 + disk_minor = -1 +} else { + disk_major = $rq->rq_disk->major + disk_minor = $rq->rq_disk->first_minor +} + +rq_flags = $rq==0? 0:$rq->cmd_flags +} + +/** + * probe ioscheduler.elv_add_request : probe to indicate request is added to the request queue. + * @elevator_name : The type of I/O elevator currently enabled. + * @q : Pointer to request queue. + * @rq : Address of request. + * @rq_flags : Request flags. + * @disk_major : Disk major no of request. + * @disk_minor : Disk minor number of request. + * + */ +probe ioscheduler.elv_add_request = + ioscheduler.elv_add_request.tp !, ioscheduler.elv_add_request.kp +{} + +/** + * probe ioscheduler_trace.elv_completed_request : Fires when a request is + * completed. + * @elevator_name : The type of I/O elevator currently enabled. + * @rq : Address of request. + * @rq_flags : Request flags. + * @disk_major : Disk major no of request. + * @disk_minor : Disk minor number of request. + * + */ +probe ioscheduler_trace.elv_completed_request + = kernel.trace("block_rq_complete") +{ +elevator_name = kernel_string($q->elevator->elevator_type->elevator_name) +rq = $rq + +if ($rq == 0 || $rq->rq_disk ==0) { + disk_major = -1 + disk_minor = -1 +} else { + disk_major = $rq->rq_disk->major + disk_minor = $rq->rq_disk->first_minor +} + +rq_flags = $rq==0? 0:$rq->cmd_flags +} + +/** + * probe ioscheduler_trace.elv_issue_request : Fires when a request is + * scheduled. + * @elevator_name : The type of I/O elevator currently enabled. + * @rq : Address of request. + * @rq_flags : Request flags. + * @disk_major : Disk major no of request. + * @disk_minor : Disk minor number of request. + * + */ +probe ioscheduler_trace.elv_issue_request + = kernel.trace("block_rq_issue") +{ +elevator_name = kernel_string($q->elevator->elevator_type->elevator_name) +rq = $rq + +if ($rq == 0 || $rq->rq_disk ==0) { + disk_major = -1 + disk_minor = -1 +} else { + disk_major = $rq->rq_disk->major + disk_minor = $rq->rq_disk->first_minor +} + +rq_flags = $rq==0? 0:$rq->cmd_flags +} + +/** + * probe ioscheduler_trace.elv_requeue_request : Fires when a request is + * put back on the queue, when the hadware cannot accept more requests. + * @elevator_name : The type of I/O elevator currently enabled. + * @rq : Address of request. + * @rq_flags : Request flags. + * @disk_major : Disk major no of request. + * @disk_minor : Disk minor number of request. + * + */ +probe ioscheduler_trace.elv_requeue_request + = kernel.trace("block_rq_requeue") +{ +elevator_name = kernel_string($q->elevator->elevator_type->elevator_name) +rq = $rq + +if ($rq == 0 || $rq->rq_disk ==0) { + disk_major = -1 + disk_minor = -1 +} else { + disk_major = $rq->rq_disk->major + disk_minor = $rq->rq_disk->first_minor +} + +rq_flags = $rq==0? 0:$rq->cmd_flags +} + +/** + * probe ioscheduler_trace.elv_abort_request : Fires when a request is aborted. + * @elevator_name : The type of I/O elevator currently enabled. + * @rq : Address of request. + * @rq_flags : Request flags. + * @disk_major : Disk major no of request. + * @disk_minor : Disk minor number of request. + * + */ +probe ioscheduler_trace.elv_abort_request + = kernel.trace("block_rq_abort") +{ +elevator_name = kernel_string($q->elevator->elevator_type->elevator_name) +rq = $rq + +if ($rq == 0 || $rq->rq_disk ==0) { + disk_major = -1 + disk_minor = -1 +} else { + disk_major = $rq->rq_disk->major + disk_minor = $rq->rq_disk->first_minor +} + +rq_flags = $rq==0? 0:$rq->cmd_flags +} + +/** + * probe ioscheduler_trace.plug - Fires when a request queue is plugged; + * ie, requests in the queue cannot be serviced by block driver. + * @rq_queue : request queue + * + */ +probe ioscheduler_trace.plug = kernel.trace("block_plug") +{ + rq_queue = $q +} + +/** + * probe ioscheduler_trace.unplug_io - Fires when a request queue is unplugged; + * Either, when number of pending requests in the queue exceeds threshold + * or, upon expiration of timer that was activated when queue was plugged. + * @rq_queue : request queue + * + */ +probe ioscheduler_trace.unplug_io = kernel.trace("block_unplug_io") +{ + rq_queue = $q +} + +/** + * probe ioscheduler_trace.unplug_timer - Fires when unplug timer associated + * with a request queue expires. + * @rq_queue : request queue + * + */ +probe ioscheduler_trace.unplug_timer = kernel.trace("block_unplug_timer") +{ + rq_queue = $q +} + function disk_major_from_request:long(var_q:long) %{ /* pure */ struct request_queue *q = (struct request_queue *)((long)THIS->var_q); diff --git a/testsuite/buildok/ioblock_test.stp b/testsuite/buildok/ioblock_test.stp index 4d3dadfa6..552379943 100755 --- a/testsuite/buildok/ioblock_test.stp +++ b/testsuite/buildok/ioblock_test.stp @@ -25,3 +25,10 @@ probe ioblock.end { devname, sector, flags, rw, bio_rw_str(rw), vcnt, idx, phys_segments, size, bytes_done, error, ino) %) } + +probe ioblock_trace.* +{ + log(pp()) + printf("%s\t%p\t%d\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", + devname, q, sector, flags, rw, bio_rw_str(rw), vcnt, idx, phys_segments, size, bytes_done, error, ino) +} diff --git a/testsuite/buildok/ioscheduler.stp b/testsuite/buildok/ioscheduler.stp index 2d88d2d5f..55ef9a0f1 100755 --- a/testsuite/buildok/ioscheduler.stp +++ b/testsuite/buildok/ioscheduler.stp @@ -7,3 +7,16 @@ probe ioscheduler.* printf("ppname: %s, elv_name: %s, %d, %d", probefunc(), elevator_name, disk_major, disk_minor) } + +probe ioscheduler_trace.elv* +{ + printf("ppname: %s, request %p, elv_name: %s, %d, %d", probefunc(), + rq, elevator_name, disk_major, disk_minor) +} + +probe ioscheduler_trace.plug, ioscheduler_trace.unplug_io, ioscheduler_trace.unplug_timer +{ + printf("ppname: %s, request %p, elv_name: %s, %d, %d", probefunc(), + rq_queue) +} + -- 2.43.5