#!/bin/ksh
# Nagios Performance
#
# returns the average execution time and check latency for graphing
# by G. Stangl 18 02 2007
#

PROGNAME=`/bin/basename $0`
PROGPATH=`echo $0 | /bin/sed -e 's,[\\/][^\\/][^\\/]*$,,'`
REVISION="1.0"

. $PROGPATH/utils.sh

# ====== Define default thresholds ======
ExeW=10
ExeC=30
LatW=30
LatC=60
Debug=no
# =======================================

print_usage() {
    echo "Usage: $PROGNAME -E war,crit -L warn,crit [-d]"
    echo "       $PROGNAME -h"
    echo "       $PROGNAME -V"
    echo ""
    echo " -d                  Debug Mode on"
    echo " -E warn,crit        Service Check Execution Time Warning and Critical threshold [sec]"
    echo " -L warn,crit        Service Check Latency Warning and Critical threshold [sec]"
    echo ""
}

print_help() {
    print_revision $PROGNAME $REVISION
    echo ""
    print_usage
    echo ""
    echo "Nagios Key Performance data plugin for Nagios"
    echo ""
}

while test -n "$1"; do
    if [ $Debug = "y" ]; then echo "DEBUG: processing Arg $1"; fi
    case "$1" in
        -h)
            print_help
            exit $STATE_OK
            ;;
        -V)
            print_revision $PROGNAME $VERSION
            exit $STATE_OK
            ;;
        -d)
            Debug=y
            echo "DEBUG mode swithed on"
            ;;
        -E)             
            # split tuple into Warning and Critical threshol
            ExeW=`echo $2 | cut -d ',' -f1`
            ExeC=`echo $2 | cut -d ',' -f2`
            if [ $Debug = "y" ]; then echo "DEBUG: Thresholds found Execution Time warn,crit: $ExeW,$ExeC"; fi
            if [ $ExeW -ge $ExeC ]; then echo "Crit must be greater than Warn threshold"; print_usage; exit $STATE_UNKNOWN; fi
            shift
            ;;
        -L)
            # split tuple into Warning and Critical threshol
            LatW=`echo $2 | cut -d ',' -f1`
            LatC=`echo $2 | cut -d ',' -f2`
            if [ $Debug = "y" ]; then echo "DEBUG: Thresholds found Latency warn,crit: $LatW,$LatC"; fi
            if [ $LatW -ge $LatC ]; then echo "Crit must be greater than Warn threshold"; print_usage; exit $STATE_UNKNOWN; fi
            shift
            ;;
        *)
            echo "Unknown argument: $1"
            print_usage
            exit $STATE_UNKNOWN
            ;;
    esac
    shift
done

if [ $Debug = "y" ]; then echo "DEBUG: using thresholds for Exe: $ExeW,$ExeC and Lat: $LatW,$LatC";  fi

NS=/usr/local/nagios/bin/nagiostats


# raw input from nagiosstats output:
# Active Service Latency:               0.001 / 2.189 / 0.360 %
# Active Service Execution Time:        0.041 / 40.317 / 1.009 sec

Lat=`$NS | grep 'Active Service Latency:' | awk '{print $8}'`
Exe=`$NS | grep 'Active Service Execution Time:' | awk '{print $9}'`

# check against thresholds
# ideally latency and average execution time are <5sec
ES=0
if [ $Lat -gt $LatW ]; then ES=1; fi
if [ $Lat -gt $LatC ]; then ES=2; fi
if [ $Exe -gt $ExeW ]; then ES=1; fi
if [ $Exe -gt $ExeC ]; then ES=2; fi


# extract from uptime if a "marker shall be set" i.e. setting exe=-1
# Program Running Time:                 0d 7h 40m 35s
marker=`$NS | grep 'Program Running Time:' | grep ' 0d 0h [0-5]m' | wc -l | awk '{print $1}'`
if [ $marker -eq 1 ]; then Exe=-1; fi

# compose output
msg="Nagios average Service Execution time:$Exe sec, average latency:$Lat sec"
perf="exec_time=$Exe;$ExeW;$ExeC;0;0 latency=$Lat;$LatW;$LatC;0;0"
echo "$msg | $perf"
exit $ES