Random Project

Many fixes, works again.

Hi,

I had to do a few fixes and some (minor) clearing up compared to the 0.0.4 version posted here.

The plugin works again now.. as for SElinux, I will find out once I created an RPM for our environment and do a testing rollout 🙂

Regards,
Frederic

———-check_iostat.sh———–
#!/bin/bash
#
# Version 0.0.2 – Jan/2009
# Changes: added device verification
#
# by Thiago Varela – [email protected]
#
# ————————————–
#
# Version 0.0.3 – Dec/2011
# Changes:
# – changed values from bytes to mbytes
# – fixed bug to get traffic data without comma but point
# – current values are displayed now, not average values (first run of iostat)
#
# by Philipp Niedziela – [email protected]
#
# Version 0.0.4 – April/2014
# Changes:
# – Allow Empty warn/crit levels
# – Can check I/O, WAIT Time, or Queue
#
# by Warren Turner
#
# Version 0.0.5 – Jun/2014
# Changes:
# – removed -y flag from call since iostat doesn’t know about it any more (June 2014)
# – only needed executions of iostat are done now (save cpu time whenever you can)
# – fixed the obvious problems of missing input values (probably because of the now unimplemented “-y”) with -x values
# – made perfomance data optional (I like to have choice in the matter)
#
# by Frederic Krueger / [email protected]
#

iostat=`which iostat 2>/dev/null`
bc=`which bc 2>/dev/null`

function help {
echo -e ”
Usage:

-d =
–Device to be checked. Example: “-d sda”

-i = IO Check Mode
–Checks Total Disk IO, Read IO/Sec, Write IO/Sec, Bytes Read/Sec, Bytes Written/Sec
–warning/critical = Total IO,Read IO/Sec,Write IO/Sec,Bytes Read/Sec,Bytes Written/Sec

-q = Queue Mode
–Checks Disk Queue Lengths
–warning/critial = Total Queue Length,Read Queue Length,Write Queue Length

-W = Wait Time Mode
–Check the time for I/O requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them.
–warning/critical = Avg I/O Wait Time/ms,Read Wait Time/ms,Write Wait Time/ms

-p = Provide performance data for later graphing

-h = This help

exit -1
}

# Ensuring we have the needed tools:
( [ ! -f $iostat ] || [ ! -f $bc ] ) &&
( echo “ERROR: You must have iostat and bc installed in order to run this pluginntuse: apt-get install systat bcn” && exit -1 )

io=0
queue=0
waittime=0
printperfdata=0
STATE=”OK”

MSG=””
PERFDATA=””

# Getting parameters:
while getopts “d:w:c:io:pqu:Wt:h” OPT; do
case $OPT in
“d”) disk=$OPTARG;;
“w”) warning=$OPTARG;;
“c”) critical=$OPTARG;;
“i”) io=1;;
“p”) printperfdata=1;;
“q”) queue=1;;
“W”) waittime=1;;
“h”) help;;
esac
done

# Autofill if parameters are empty
if [ -z “$disk” ]
then disk=sda
fi

if [ -z “$warning” ]
then warning=99999
fi

if [ -z “$critical” ]
then critical=99999
fi

# Adjusting the warn and crit levels:
crit_total=`echo $critical | cut -d, -f1`
crit_read=`echo $critical | cut -d, -f2`
crit_written=`echo $critical | cut -d, -f3`
crit_kbytes_read=`echo $critical | cut -d, -f4`
crit_kbytes_written=`echo $critical | cut -d, -f5`

warn_total=`echo $warning | cut -d, -f1`
warn_read=`echo $warning | cut -d, -f2`
warn_written=`echo $warning | cut -d, -f3`
warn_kbytes_read=`echo $warning | cut -d, -f4`
warn_kbytes_written=`echo $warning | cut -d, -f5`

## # Checking parameters:
# [ ! -b “/dev/$disk” ] && echo “ERROR: Device incorrectly specified” && help

# ( [ “$warn_total” == “” ] || [ “$warn_read” == “” ] || [ “$warn_written” == “” ] ||
# [ “$crit_total” == “” ] || [ “$crit_read” == “” ] || [ “$crit_written” == “” ] ) &&
# echo “ERROR: You must specify all warning and critical levels” && help

# ( [[ “$warn_total” -ge “$crit_total” ]] ||
# [[ “$warn_read” -ge “$crit_read” ]] ||
# [[ “$warn_written” -ge “$crit_written” ]] ) &&
# echo “ERROR: critical levels must be highter than warning levels” && help

# iostat parameters:
# -m: megabytes
# -k: kilobytes
# first run of iostat shows statistics since last reboot, second one shows current vaules of hdd

# Doing the actual checks:

# -d has the total per second, -x the rest
TMPD=`$iostat $disk -k -d 2 1 | grep $disk`
TMPX=`$iostat $disk -x -d 2 1 | grep $disk`

## IO Check ##
if [ “$io” == “1” ]
then
total=`echo “$TMPD” | awk ‘{print $2}’`
read_sec=`echo “$TMPX” | awk ‘{print $4}’`
written_sec=`echo “$TMPX” | awk ‘{print $5}’`
kbytes_read_sec=`echo “$TMPD” | awk ‘{print $6}’`
kbytes_written_sec=`echo “$TMPD” | awk ‘{print $7}’`

# IO # “Converting” values to float (string replace , with .)
total=${total/,/.}
read_sec=${read_sec/,/.}
written_sec=${written_sec/,/.}
kbytes_read_sec=${kbytes_read_sec/,/.}
kbytes_written_sec=${kbytes_written_sec/,/.}

# IO # Comparing the result and setting the correct level:

if [ “$warn_total” -ne “99999” ]
then
if ( [ “`echo “$total >= $warn_total” | bc`” == “1” ] || [ “`echo “$read_sec >= $warn_read” | bc`” == “1” ] ||
[ “`echo “$written_sec >= $warn_written” | bc`” == “1” ] || [ “`echo “$kbytes_read_sec >= $warn_kbytes_read” | bc -q`” == “1” ] ||
[ “`echo “$kbytes_written_sec >= $warn_kybtes_written” | bc`” == “1” ] )
then
STATE=”WARNING”
status=1
fi
fi

if [ “$crit_total” -ne “99999” ]
then
if ( [ “`echo “$total >= $crit_total” | bc`” == “1” ] || [ “`echo “$read_sec >= $crit_read” | bc -q`” == “1” ] ||
[ “`echo “$written_sec >= $crit_written” | bc`” == “1” ] || [ “`echo “$kbytes_read_sec >= $crit_kbytes_read” | bc -q`” == “1” ] ||
[ “`echo “$kbytes_written_sec >= $crit_kbytes_written” | bc`” == “1” ] )
then
STATE=”CRITICAL”
status=2
fi
fi

if [ “$crit_total” == “99999” ] && [ “$warn_total” == “99999” ]
then
STATE=”OK”
status=0

fi

# IO # Printing the results:
MSG=”$STATE – I/O stats: Total IO/Sec=$total Read IO/Sec=$read_sec Write IO/Sec=$written_sec KBytes Read/Sec=$kbytes_read_sec KBytes_Written/Sec=$kbytes_written_sec”
PERFDATA=” | total_io_sec’=$total; read_io_sec=$read_sec; write_io_sec=$written_sec; kbytes_read_sec=$kbytes_read_sec; kbytes_written_sec=$kbytes_written_sec;”

fi

## QUEUE Check ##
if [ “$queue” == “1” ]
then
total=`echo “$TMPX” | awk ‘{print $8}’`
readq_sec=`echo “$TMPX” | awk ‘{print $6}’`
writtenq_sec=`echo “$TMPX” | awk ‘{print $7}’`

# QUEUE # “Converting” values to float (string replace , with .)
total=${total/,/.}
readq_sec=${readq_sec/,/.}
writtenq_sec=${writtenq_sec/,/.}

# QUEUE # Comparing the result and setting the correct level:

if [ “$warn_total” -ne “99999” ]
then
if ( [ “`echo “$total >= $warn_total” | bc`” == “1” ] || [ “`echo “$readq_sec >= $warn_read” | bc`” == “1” ] ||
[ “`echo “$writtenq_sec >= $warn_written” | bc`” == “1” ] )
then
STATE=”WARNING”
status=1
fi
fi

if [ “$crit_total” -ne “99999” ]
then
if ( [ “`echo “$total >= $crit_total” | bc`” == “1” ] || [ “`echo “$readq_sec >= $crit_read” | bc -q`” == “1” ] ||
[ “`echo “$writtenq_sec >= $crit_written” | bc`” == “1” ] )
then
STATE=”CRITICAL”
status=2
fi
fi

if [ “$crit_total” == “99999” ] && [ “$warn_total” == “99999” ]
then
STATE=”OK”
status=0

fi

# QUEUE # Printing the results:
MSG=”$STATE – Disk Queue Stats: Average Queue Length=$total Read Queue/Sec=$readq_sec Write Queue/Sec=$writtenq_sec”
PERFDATA=” | total=$total; read_queue_sec=$readq_sec; write_queue_sec=$writtenq_sec;”
fi

## WAIT TIME Check ##
if [ “$waittime” == “1” ]
then
TMP=`$iostat $disk -x -k -d 2 1 | grep $disk`
avgiotime=`echo “$TMP” | awk ‘{print $10}’`
avgsvctime=`echo “$TMP” | awk ‘{print $11}’`
avgcpuutil=`echo “$TMP” | awk ‘{print $12}’`

# QUEUE # “Converting” values to float (string replace , with .)
avgiotime=${avgiotime/,/.}
avgsvctime=${avgsvctime/,/.}
avgcpuutil=${avgcpuutil/,/.}

# WAIT TIME # Comparing the result and setting the correct level:

if [ “$warn_total” -ne “99999” ]
then
if ( [ “`echo “$avgiotime >= $warn_total” | bc`” == “1” ] || [ “`echo “$avgsvctime >= $warn_read” | bc`” == “1” ] ||
[ “`echo “$avgcpuutil >= $warn_written” | bc`” == “1” ] )
then
STATE=”WARNING”
status=1
fi
fi

if [ “$crit_total” -ne “99999” ]
then
if ( [ “`echo “$avgiotime >= $crit_total” | bc`” == “1” ] || [ “`echo “$avgsvctime >= $crit_read” | bc -q`” == “1” ] ||
[ “`echo “$avgcpuutil >= $crit_written” | bc`” == “1” ] )
then
STATE=”CRITICAL”
status=2
fi
fi

if [ “$crit_total” == “99999” ] && [ “$warn_total” == “99999” ]
then
STATE=”OK”
status=0

fi

# WAIT TIME # Printing the results:
MSG=”$STATE – Wait Time Stats: Avg I/O Wait Time/ms=$avgiotime Avg Service Wait Time/ms=$avgsvctime Avg CPU Utilization=$avgcpuutil”
PERFDATA=” | avg_io_waittime_ms=$avgiotime; avg_service_waittime_ms=$avgsvctime; avg_cpu_utilization=$avgcpuutil;”
fi

# now output the official result
echo -n “$MSG”
if [ “x$printperfdata” == “x1” ]; then echo -n “$PERFDATA”; fi
echo “”
exit $status
———-/check_iostat.sh———–