#!/usr/bin/ksh # # iotop - display top disk I/O events by process. # Written using DTrace (Solaris 10 3/05). # # This is measuring disk events that have made it past system caches. # # 20-Apr-2006, ver 0.76 # # USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename] # [-m mount_point] [-t top] [interval [count]] # # iotop # default output, 5 second intervals # # -C # don't clear the screen # -D # print delta times, elapsed, us # -j # print project ID # -o # print disk delta times, us # -P # print %I/O (disk delta times) # -Z # print zone ID # -d device # instance name to snoop (eg, dad0) # -f filename # full pathname of file to snoop # -m mount_point # this FS only (will skip raw events) # -t top # print top number only # eg, # iotop 1 # 1 second samples # iotop -C # don't clear the screen # iotop -P # print %I/O (time based) # iotop -j # print project IDs # iotop -Z # print zone IDs # iotop -t 20 # print top 20 lines only # iotop -C 5 12 # print 12 x 5 second samples # # FIELDS: # UID user ID # PID process ID # PPID parent process ID # PROJ project ID # ZONE zone ID # CMD process command name # DEVICE device name # MAJ device major number # MIN device minor number # D direction, Read or Write # BYTES total size of operations, bytes # ELAPSED total elapsed from request to completion, us # DISKTIME total time for disk to complete request, us # %I/O percent disk I/O, based on time (DISKTIME) # load 1 min load average # disk_r total disk read Kbytes for sample # disk_w total disk write Kbytes for sample # # NOTE: # * There are two different delta times reported. -D prints the # elapsed time from the disk request (strategy) to the disk completion # (iodone); -o prints the time for the disk to complete that event # since it's last event (time between iodones), or, the time to the # strategy if the disk had been idle. # * The %I/O value can exceed 100%. It represents how busy a process is # making the disks, in terms of a single disk. A value of 200% could # mean 2 disks are busy at 100%, or 4 disks at 50%... # # SEE ALSO: iosnoop # BigAdmin: DTrace, http://www.sun.com/bigadmin/content/dtrace # Solaris Dynamic Tracing Guide, http://docs.sun.com # DTrace Tools, http://www.brendangregg.com/dtrace.html # # INSPIRATION: top(1) by William LeFebvre # # COPYRIGHT: Copyright (c) 2005, 2006 Brendan Gregg. # # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License, Version 1.0 only # (the "License"). You may not use this file except in compliance # with the License. # # You can obtain a copy of the license at Docs/cddl1.txt # or http://www.opensolaris.org/os/licensing. # See the License for the specific language governing permissions # and limitations under the License. # # CDDL HEADER END # # KNOWN BUGS: # - This can print errors while running on servers with Veritas volumes. # # Author: Brendan Gregg [Sydney, Australia] # # 15-Jul-2005 Brendan Gregg Created this. # ############################## # --- Process Arguments --- # ### default variables opt_device=0; opt_file=0; opt_mount=0; opt_clear=1; opt_proj=0; opt_zone=0 opt_percent=0; opt_def=1; opt_bytes=1; filter=0; device=.; filename=.; mount=. opt_top=0; opt_elapsed=0; opt_dtime=0; interval=5; count=-1; top=0 ### process options while getopts CDd:f:hjm:oPt:Z name do case $name in C) opt_clear=0 ;; D) opt_elapsed=1; opt_bytes=0 ;; d) opt_device=1; device=$OPTARG ;; f) opt_file=1; filename=$OPTARG ;; j) opt_proj=1; opt_def=0 ;; m) opt_mount=1; mount=$OPTARG ;; o) opt_dtime=1; opt_bytes=0 ;; P) opt_percent=1; opt_dtime=1; opt_bytes=0 ;; t) opt_top=1; top=$OPTARG ;; Z) opt_zone=1; opt_def=0 ;; h|?) cat <<-END >&2 USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename] [-m mount_point] [-t top] [interval [count]] -C # don't clear the screen -D # print delta times, elapsed, us -j # print project ID -o # print disk delta times, us -P # print %I/O (disk delta times) -Z # print zone ID -d device # instance name to snoop -f filename # snoop this file only -m mount_point # this FS only -t top # print top number only eg, iotop # default output, 5 second samples iotop 1 # 1 second samples iotop -P # print %I/O (time based) iotop -m / # snoop events on filesystem / only iotop -t 20 # print top 20 lines only iotop -C 5 12 # print 12 x 5 second samples END exit 1 esac done shift $(( $OPTIND - 1 )) ### option logic if [[ "$1" > 0 ]]; then interval=$1; shift fi if [[ "$1" > 0 ]]; then count=$1; shift fi if (( opt_proj && opt_zone )); then opt_proj=0 fi if (( opt_elapsed && opt_dtime )); then opt_elapsed=0 fi if (( opt_device || opt_mount || opt_file )); then filter=1 fi if (( opt_clear )); then clearstr=`clear` else clearstr=. fi ################################# # --- Main Program, DTrace --- # /usr/sbin/dtrace -n ' /* * Command line arguments */ inline int OPT_def = '$opt_def'; inline int OPT_proj = '$opt_proj'; inline int OPT_zone = '$opt_zone'; inline int OPT_clear = '$opt_clear'; inline int OPT_bytes = '$opt_bytes'; inline int OPT_elapsed = '$opt_elapsed'; inline int OPT_dtime = '$opt_dtime'; inline int OPT_percent = '$opt_percent'; inline int OPT_device = '$opt_device'; inline int OPT_mount = '$opt_mount'; inline int OPT_file = '$opt_file'; inline int OPT_top = '$opt_top'; inline int INTERVAL = '$interval'; inline int COUNTER = '$count'; inline int FILTER = '$filter'; inline int TOP = '$top'; inline string DEVICE = "'$device'"; inline string FILENAME = "'$filename'"; inline string MOUNT = "'$mount'"; inline string CLEAR = "'$clearstr'"; #pragma D option quiet /* * Print header */ dtrace:::BEGIN { last_event[""] = 0; /* starting values */ counts = COUNTER; secs = INTERVAL; disk_r = 0; disk_w = 0; printf("Tracing... Please wait.\n"); } /* * Check event is being traced */ io:genunix::start, io:genunix::done { /* default is to trace unless filtering, */ this->ok = FILTER ? 0 : 1; /* check each filter, */ (OPT_device == 1 && DEVICE == args[1]->dev_statname)? this->ok = 1 : 1; (OPT_file == 1 && FILENAME == args[2]->fi_pathname) ? this->ok = 1 : 1; (OPT_mount == 1 && MOUNT == args[2]->fi_mount) ? this->ok = 1 : 1; } /* * Reset last_event for disk idle -> start * this prevents idle time being counted as disk time. */ io:genunix::start /! pending[args[1]->dev_statname]/ { /* save last disk event */ last_event[args[1]->dev_statname] = timestamp; } /* * Store entry details */ io:genunix::start /this->ok/ { /* these are used as a unique disk event key, */ this->dev = args[0]->b_edev; this->blk = args[0]->b_blkno; /* save disk event details, */ start_uid[this->dev, this->blk] = uid; start_pid[this->dev, this->blk] = pid; start_ppid[this->dev, this->blk] = ppid; start_comm[this->dev, this->blk] = execname; start_time[this->dev, this->blk] = timestamp; start_proj[this->dev, this->blk] = curpsinfo->pr_projid; start_zone[this->dev, this->blk] = curpsinfo->pr_zoneid; start_rw[this->dev, this->blk] = args[0]->b_flags & B_READ ? "R" : "W"; disk_r += args[0]->b_flags & B_READ ? args[0]->b_bcount : 0; disk_w += args[0]->b_flags & B_READ ? 0 : args[0]->b_bcount; /* increase disk event pending count */ pending[args[1]->dev_statname]++; } /* * Process and Print completion */ io:genunix::done /this->ok/ { /* decrease disk event pending count */ pending[args[1]->dev_statname]--; /* * Process details */ /* fetch entry values */ this->dev = args[0]->b_edev; this->blk = args[0]->b_blkno; this->suid = start_uid[this->dev, this->blk]; this->spid = start_pid[this->dev, this->blk]; this->sppid = start_ppid[this->dev, this->blk]; this->sproj = start_proj[this->dev, this->blk]; this->szone = start_zone[this->dev, this->blk]; self->scomm = start_comm[this->dev, this->blk]; this->stime = start_time[this->dev, this->blk]; this->etime = timestamp; /* endtime */ this->elapsed = this->etime - this->stime; self->rw = start_rw[this->dev, this->blk]; this->dtime = last_event[args[1]->dev_statname] == 0 ? 0 : timestamp - last_event[args[1]->dev_statname]; /* memory cleanup */ start_uid[this->dev, this->blk] = 0; start_pid[this->dev, this->blk] = 0; start_ppid[this->dev, this->blk] = 0; start_time[this->dev, this->blk] = 0; start_comm[this->dev, this->blk] = 0; start_zone[this->dev, this->blk] = 0; start_proj[this->dev, this->blk] = 0; start_rw[this->dev, this->blk] = 0; /* * Choose statistic to track */ OPT_bytes ? this->value = args[0]->b_bcount : 1; OPT_elapsed ? this->value = this->elapsed / 1000 : 1; OPT_dtime ? this->value = this->dtime / 1000 : 1; /* * Save details */ OPT_def ? @out[this->suid, this->spid, this->sppid, self->scomm, args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor, self->rw] = sum(this->value) : 1; OPT_proj ? @out[this->sproj, this->spid, this->sppid, self->scomm, args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor, self->rw] = sum(this->value) : 1; OPT_zone ? @out[this->szone, this->spid, this->sppid, self->scomm, args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor, self->rw] = sum(this->value) : 1; /* save last disk event */ last_event[args[1]->dev_statname] = timestamp; self->scomm = 0; self->rw = 0; } /* * Prevent pending from underflowing * this can happen if this program is started during disk events. */ io:genunix::done /pending[args[1]->dev_statname] < 0/ { pending[args[1]->dev_statname] = 0; } /* * Timer */ profile:::tick-1sec { secs--; } /* * Print Report */ profile:::tick-1sec /secs == 0/ { /* fetch 1 min load average */ this->load1a = `hp_avenrun[0] / 65536; this->load1b = ((`hp_avenrun[0] % 65536) * 100) / 65536; /* convert counters to Kbytes */ disk_r /= 1024; disk_w /= 1024; /* print status */ OPT_clear ? printf("%s", CLEAR) : 1; printf("%Y, load: %d.%02d, disk_r: %6d KB, disk_w: %6d KB\n\n", walltimestamp, this->load1a, this->load1b, disk_r, disk_w); /* print headers */ OPT_def ? printf(" UID ") : 1; OPT_proj ? printf(" PROJ ") : 1; OPT_zone ? printf(" ZONE ") : 1; printf("%6s %6s %-16s %-7s %3s %3s %1s", "PID", "PPID", "CMD", "DEVICE", "MAJ", "MIN", "D"); OPT_bytes ? printf(" %16s\n", "BYTES") : 1; OPT_elapsed ? printf(" %16s\n", "ELAPSED") : 1; OPT_dtime && ! OPT_percent ? printf(" %16s\n", "DISKTIME") : 1; OPT_dtime && OPT_percent ? printf(" %6s\n", "%I/O") : 1; /* truncate to top lines if needed */ OPT_top ? trunc(@out, TOP) : 1; /* normalise to percentage if needed */ OPT_percent ? normalize(@out, INTERVAL * 10000) : 1; /* print data */ ! OPT_percent ? printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %16@d\n", @out) : printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %6@d\n", @out); printf("\n"); /* clear data */ trunc(@out); disk_r = 0; disk_w = 0; secs = INTERVAL; counts--; } /* * End of program */ profile:::tick-1sec /counts == 0/ { exit(0); } /* * Cleanup for Ctrl-C */ dtrace:::END { trunc(@out); } '