#!/usr/bin/perl -w
#
# check_3com_health -  Check 3Com switch health
# nagios: -epn
#
# Copyright (C) 2009 Jason Abraham <jtabraha@gmail.com>
#
# WCIT
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#

use strict;
use warnings;
use Nagios::Plugin;
use File::Basename;
use Net::SNMP;

###Constants
use constant WCIT_WARNFLAG      => 1;
use constant WCIT_CRITFLAG      => 2;
#Tests
use constant WCIT_TESTFAN       => 1;
use constant WCIT_TESTPWR       => 2;
use constant WCIT_TESTMEM       => 4;
use constant WCIT_TESTCPU       => 8;
#Defaults
use constant WCIT_DefTCPTimeout => 5;
use constant WCIT_DefWarnFAN    => 100; #Less then % fans active
use constant WCIT_DefCritFAN    => 76;
use constant WCIT_DefWarnPWR    => 100; #Less then % power supplies active
use constant WCIT_DefCritPWR    => 76;
use constant WCIT_DefWarnMEM    => 85; #Greater then % mem used
use constant WCIT_DefCritMEM    => 90;
use constant WCIT_DefWarnCPU    => 85; #Greater then % cpu usage
use constant WCIT_DefCritCPU    => 90;
# Huawei-3Com OIDs
use constant WCIT_OID_h3cFtmUnitID          => ".1.3.6.1.4.1.43.45.1.10.2.1.1.1.1.1.2";
use constant WCIT_OID_hwDevMFanStatus       => ".1.3.6.1.4.1.43.45.1.2.23.1.9.1.1.1.2";
use constant WCIT_OID_hwDevMPowerStatus     => ".1.3.6.1.4.1.43.45.1.2.23.1.9.1.2.1.2";
use constant WCIT_OID_hwMemSize             => ".1.3.6.1.4.1.43.45.1.6.1.2.1.1.2";
use constant WCIT_OID_hwMemFree             => ".1.3.6.1.4.1.43.45.1.6.1.2.1.1.3";
use constant WCIT_OID_hwCpuCostRatePer1Min  => ".1.3.6.1.4.1.43.45.1.6.1.1.1.3";
#SNMP Count Vals
use constant WCIT_h3cUNumStart  => 2;
use constant WCIT_h3cUNumJump   => 165;
use constant WCIT_h3cStart      => 65536;
use constant WCIT_h3cJump       => 65536;
use constant WCIT_h3cDeactive   => 2;

#evil globals but Perl loves them
use vars qw($sess $np $status $msg $numUnits %unitNumList %revUnitNumList);

#Other Vars
my ($VERSION, $PROGNAME);
my ($WarnFAN, $CritFAN, $WarnPWR, $CritPWR, $WarnMEM, $CritMEM, $WarnCPU, $CritCPU);
my ($err, %resH, $res, $key, $val, $onlyTest, $singleSysUnit); 

#DEFINE
$VERSION = "0.7";
$PROGNAME = basename($0);
$status = 0;

$np = Nagios::Plugin->new(
  usage => "Usage: %s -H <host> [ -u|unit <unit> ] [ -C <community> ]
  [ --only <f,p,m,c>] [ -w <single warn> ] [ -c <single crit> ] 
  [ --fw <fan warn> ] [ --fc <fan crit> ] [ --pw <pwr warn> ] [ --pc <pwr crit> ]
  [ --mw <mem warn> ] [ --mc <mem crit> ] [ --cw <cpu warn> ] [ --cc <cpu crit> ]
  [ -T |--tcptimeout <secs> ]",
  version => $VERSION,
  blurb => "This plugin checks varios health aspects of 3Com switch following 
  the Huawei-3Com(H3C) a3com.jv-mib.huawei SNMP standard.", 
	extra => "Note: Although the various warn/crit values are perecentages do not include the % sign
"
);

# Define and document the valid command line options
# usage, help, version, timeout and verbose are defined by default.
$np->add_arg(
	spec => 'host|H=s',
	help => 
qq{
 -H, --host=ADDRESS
   Host name or IP Address},
  required => 1
);

$np->add_arg(
	spec => 'unit|u=i',
	help => 
qq{-u, --unit=<Unit number>
   Limit tests to single unit.  Otherwise all units are tested. 
   (default: all units tested, performance data averaged) },
  required => 0
);

$np->add_arg(
	spec => 'community|C=s',
	help => 
qq{-C, --community=STRING
   SNMP read community string (default: public)
   },
  required => 0,
  default => 'public'
);

$np->add_arg(
	spec => 'only|o=s',
	help => 
qq{--only=<f,p,m,c>
   Explicity choose the tests to perform.  
   f=fan p=power m=memory c=cpu (default: test all) },
  required => 0
);

$np->add_arg(
	spec => 'warn|w=f',
	help => 
qq{-w, --warn=FLOAT
   Specify warning percent for single tests.  Can be overridden by specific 
   test values. Note: it is not recommended to use this option if doing 
   multiple tests. (default: use individual test defaults) },
  required => 0
);

$np->add_arg(
	spec => 'crit|c=f',
	help => 
qq{-c, --crit=FLOAT
   Specify critical percent for single tests.  Can be overridden by 
   specific test values.  Note: it is not recommended to use this option if 
   doing multiple tests. (default: use individual test defaults)
   },
  required => 0
);

$np->add_arg(
	spec => 'fwarn|fw=f',
	help => 
qq{--fw, --fwarn=FLOAT
   Specify warning percent for fan test. This percent refers to the percent 
   of active fans. 3 of 3 fans active = 100%, 2 of 3 fans active = 66.6%, 
   5 of 7 fans active = 71%.  If the percentage of active fans drops below 
   the specified percentage a warning will be thrown. (default: }.WCIT_DefWarnFAN.")",
  required => 0
);

$np->add_arg(
	spec => 'fcrit|fc=f',
	help => 
qq{--fc, --fcrit=FLOAT
   Specify critical percent for fan test. (default: }.WCIT_DefCritFAN.")\n",
  required => 0
);

$np->add_arg(
	spec => 'pwarn|pw=f',
	help => 
qq{--pw, --pwarn=FLOAT
   Specify warning percent for power supply test. This percent refers to 
   the percent of active power supplies. See fan option for more information.  
   (default: }.WCIT_DefWarnPWR.")",
  required => 0
);

$np->add_arg(
	spec => 'pcrit|pc=f',
	help => 
qq{--pc, --pcrit=FLOAT
   Specify critical percent for power supply test. (default: }.WCIT_DefCritFAN.")\n",
  required => 0
);

$np->add_arg(
	spec => 'mwarn|mw=f',
	help => 
qq{--mw, --mwarn=FLOAT
   Specify warning percent for free memory test. If free memory drops below 
   this percent a warning will be thrown.  (default: }.WCIT_DefWarnMEM.")",
  required => 0
);

$np->add_arg(
	spec => 'mcrit|mc=f',
	help => 
qq{--mc, --mcrit=FLOAT
   Specify critical percent for free memory test. (default: }.WCIT_DefCritMEM.")\n",
  required => 0
);

$np->add_arg(
	spec => 'cwarn|cw=f',
	help => 
qq{--cw, --cwarn=FLOAT
   Specify warning percent for cpu usage test. If cpu usage is above 
   this percent a warning will be thrown.  (default: }.WCIT_DefWarnCPU.")",
  required => 0
);

$np->add_arg(
	spec => 'ccrit|cc=f',
	help => 
qq{--cc, --ccrit=FLOAT
   Specify critical percent for cpu usage test. (default: }.WCIT_DefCritCPU.")\n",
  required => 0
);

$np->add_arg(
	spec => 'tcptimeout|T=i',
	help => 
qq{-T, --tcptimeout=INTEGER
   Timeout value, in seconds, for SNMP responses (default: }.WCIT_DefTCPTimeout.")".qq{
   Not to be confused with general plugin timeout},
  required => 0,
  default => WCIT_DefTCPTimeout
);

# Parse arguments and process standard ones (e.g. usage, help, version)

$np->getopts;


###Input Processing  !!!MORE CHECKS NEEDED!!!
$np->nagios_die("TCP Timeout must greater than 0") if( $np->opts->tcptimeout < 1 );

#Load Warn and Crit percentages
if( defined $np->opts->warn ) {
  $np->nagios_die("Global Warn must be between 0-100") if( $np->opts->warn < 0 || $np->opts->warn > 100 );
  $WarnFAN = $WarnPWR = $WarnMEM = $WarnCPU = $np->opts->warn;
}
else {
  $WarnFAN = WCIT_DefWarnFAN; 
  $WarnPWR = WCIT_DefWarnPWR;
  $WarnMEM = WCIT_DefWarnMEM;
  $WarnCPU = WCIT_DefWarnCPU;
}
$WarnFAN = $np->opts->fwarn if (defined $np->opts->fwarn);
$WarnPWR = $np->opts->pwarn if (defined $np->opts->pwarn);
$WarnMEM = $np->opts->mwarn if (defined $np->opts->mwarn);
$WarnCPU = $np->opts->cwarn if (defined $np->opts->cwarn);

if( defined $np->opts->crit ) {
  $np->nagios_die("Global Crit must be between 0-100") if( $np->opts->crit < 0 || $np->opts->crit > 100 );
  $CritFAN = $CritPWR = $CritMEM = $CritCPU = $np->opts->crit;
}
else {
  $CritFAN = WCIT_DefCritFAN;
  $CritPWR = WCIT_DefCritPWR;
  $CritMEM = WCIT_DefCritMEM;
  $CritCPU = WCIT_DefCritCPU;
}
$CritFAN = $np->opts->fcrit if (defined $np->opts->fcrit);
$CritPWR = $np->opts->pcrit if (defined $np->opts->pcrit);
$CritMEM = $np->opts->mcrit if (defined $np->opts->mcrit);
$CritCPU = $np->opts->ccrit if (defined $np->opts->ccrit);

#Which Checks  
$onlyTest = 0;
if(defined $np->opts->only) {
  my @theOpts = split(/,/,$np->opts->only);
  $np->nagios_die("Too many options specified for --only option") if( scalar @theOpts > 4 );
  foreach $val ( @theOpts ) {
    if($val eq 'f')      { $onlyTest = $onlyTest | WCIT_TESTFAN; }
    elsif($val eq 'p') { $onlyTest = $onlyTest | WCIT_TESTPWR; }
    elsif($val eq 'm') { $onlyTest = $onlyTest | WCIT_TESTMEM; }
    elsif($val eq 'c') { $onlyTest = $onlyTest | WCIT_TESTCPU; }
    else { $np->nagios_die("Invalid --only option: ".$val) }
  }
} else {
  $onlyTest = WCIT_TESTFAN | WCIT_TESTPWR | WCIT_TESTMEM | WCIT_TESTCPU;
}



##############################################################################
# Start actual work

#Create Session
($sess,$err) = Net::SNMP->session(Hostname => $np->opts->host,
                                      Community => $np->opts->community,
                                      Version => 1,
                                      Timeout => $np->opts->tcptimeout);
$np->nagios_exit(CRITICAL,"$err") unless($sess);

# Start Plugin Timeout
alarm $np->opts->timeout;

###Compile UNIT information
%resH = get_snmphash(WCIT_OID_h3cFtmUnitID,'h3cFtmUnitID');
myexit(CRITICAL,"SNMP ERROR") unless (%resH);
myexit(CRITICAL,"No units found") if( (scalar keys %resH) < 1 );

if ( defined $np->opts->unit ) {
  my $unitFound;
  foreach $key (keys %resH) {
    $unitFound = 1 if( $np->opts->unit == $resH{$key} );
  }
  myexit(CRITICAL,"Unit #".$np->opts->unit." not found") unless ( defined $unitFound );
}
#create unitNumList HASH
#--SystemUnitNum -> UserUnitNum
{
  my $index = 1;
  my $jmpIndex = WCIT_h3cUNumStart;
  while( defined $resH{$jmpIndex} )
  {
    $unitNumList{$index} = $resH{$jmpIndex};
    $index++;
    $jmpIndex += WCIT_h3cUNumJump;
  }
}
#create revUnitNumList for messages back to the user
foreach $key (keys %unitNumList) {
  $revUnitNumList{ $unitNumList{$key} } = $key;
}
#$singleSysUnit = $revUnitNumList{ $np->opts->unit } if ( defined $np->opts->unit );
if ( defined $np->opts->unit )
{
  my $sysUnit = $revUnitNumList{ $np->opts->unit };
  %unitNumList = ();
  $unitNumList{$sysUnit} = $np->opts->unit;
  %revUnitNumList = ();
  $revUnitNumList{$np->opts->unit} = $sysUnit;
}
$numUnits = scalar keys %unitNumList;

$msg = undef;



###Test Fan & Pwr
do_itemStatus('fan', $WarnFAN, $CritFAN, WCIT_OID_hwDevMFanStatus, 'hwDevMFanStatus') if( $onlyTest & WCIT_TESTFAN );
do_itemStatus('pwr', $WarnPWR, $CritPWR, WCIT_OID_hwDevMPowerStatus, 'hwDevMPowerStatus') if( $onlyTest & WCIT_TESTPWR );

if( $onlyTest & WCIT_TESTMEM )
{
  my ($overallPercent, $singlePercent, %sizeHash, %freeHash, $size, $used, $sysUnit, $okay, $thres, $sysUnitTran);
  $overallPercent = 0;
  $okay = 1;
  if( defined $msg ) { $msg .= '  MEM:'; }
  else { $msg = 'MEM:'; }
  #load mem size and free
  %sizeHash = get_snmphash(WCIT_OID_hwMemSize,'hwMemSize');
  unless(%sizeHash) {
    $status |= WCIT_CRITFLAG;
    $msg .= 'SNMP ERROR';
    return;
  }
  %freeHash = get_snmphash(WCIT_OID_hwMemFree,'hwMemFree');
  unless(%freeHash) {
    $status |= WCIT_CRITFLAG;
    $msg .= 'SNMP ERROR';
    return;
  }
  foreach $sysUnit (keys %unitNumList) {
    $singlePercent = 100;
    $sysUnitTran = $sysUnit * WCIT_h3cJump;
    $size = $sizeHash{$sysUnitTran};
    $used = $size - $freeHash{$sysUnitTran};
    $singlePercent = ($used/$size*100) if ($size > 0);;
    if($singlePercent > $WarnMEM) {
      $okay = 0;
      $msg = sprintf("%s U%d=%.0f%%",$msg,$revUnitNumList{$sysUnit},$singlePercent);
      $status = $status | WCIT_WARNFLAG;
      $status = $status | WCIT_CRITFLAG if($singlePercent > $CritMEM);
    }
    $overallPercent += $singlePercent;
  }
  $overallPercent /= $numUnits;
  $msg .= "OK" if($okay);
  $np->add_perfdata(
    label     => 'mem',
    value     => sprintf('%.0f%%',$overallPercent),
    warning => $WarnMEM,
    critical => $CritMEM,
    min       => 0,
    max       => 100
  ); 
}

if( $onlyTest & WCIT_TESTCPU )
{
  my ($overallPercent, $singlePercent, %snmpHash, $sysUnit, $okay, $thres, $sysUnitTran);
  $overallPercent = 0;
  $okay = 1;
  if( defined $msg ) { $msg .= '  CPU:'; }
  else { $msg = 'CPU:'; }
  %snmpHash = get_snmphash(WCIT_OID_hwCpuCostRatePer1Min,'hwCpuCostRatePer1Min');
  unless(%snmpHash) {
    $status |= WCIT_CRITFLAG;
    $msg .= 'SNMP ERROR';
    return;
  }
  foreach $sysUnit (keys %unitNumList) {
    $sysUnitTran = $sysUnit * WCIT_h3cJump;
    $singlePercent = $snmpHash{$sysUnitTran};
    if($singlePercent > $WarnCPU) {
      $okay = 0;
      $msg = sprintf("%s U%d=%.0f%%",$msg,$revUnitNumList{$sysUnit},$singlePercent);
      $status = $status | WCIT_WARNFLAG;
      $status = $status | WCIT_CRITFLAG if($singlePercent > $CritCPU);
    }
    $overallPercent += $singlePercent;
  }
  $overallPercent /= $numUnits;
  $msg .= "OK" if($okay);
  $np->add_perfdata(
    label     => 'cpu',
    value     => sprintf('%.0f%%',$overallPercent),
    warning => $WarnCPU,
    critical => $CritCPU,
    min       => 0,
    max       => 100
  ); 
}

alarm(0);

$np->nagios_exit(CRITICAL,$msg) if($status & WCIT_CRITFLAG);
$np->nagios_exit(WARNING,$msg) if($status & WCIT_WARNFLAG);
$np->nagios_exit(OK,$msg);

#################################
# Start SUBS
sub do_itemStatus {
  my ($overallPercent, $singlePercent, $sysUnit, $okay, $thres, $warn, $crit, $item, $oid, $oidtext, %snmpHash);
  my ($total, $good, $sysUnitTran, $sysUnitTranHigh, $key);
  ($item, $warn, $crit, $oid, $oidtext) = @_;
  $overallPercent = 0;
  $okay = 1;
  if( defined $msg ) { $msg .= '  '.uc($item).':'; }
  else { $msg = uc($item).':'; }
  %snmpHash = get_snmphash($oid,$oidtext);
  unless(%snmpHash) {
    $status |= WCIT_CRITFLAG;
    $msg .= 'SNMP ERROR';
    return;
  }
  foreach $sysUnit (keys %unitNumList) {
    $singlePercent = 0;
    $sysUnitTran = $sysUnit * WCIT_h3cJump;
    $sysUnitTranHigh = $sysUnitTran + WCIT_h3cJump; 
    $total = 0;
    $good = 0;
    foreach $key (keys %snmpHash)
    {
      if($key >= $sysUnitTran && $key < $sysUnitTranHigh) {
        $total++;
        $good++ if($snmpHash{$key} != WCIT_h3cDeactive);
      }
    }
    $singlePercent = ($good/$total*100) if ($total > 0); 
    if($singlePercent < $warn) {
      $okay = 0;
      $msg = sprintf("%s U%d=%.0f%%",$msg,$revUnitNumList{$sysUnit},$singlePercent);
      $status = $status | WCIT_WARNFLAG;
      $status = $status | WCIT_CRITFLAG if($singlePercent < $crit);
    }
    $overallPercent += $singlePercent;
  }
  $overallPercent /= $numUnits;
  $msg .= "OK" if($okay);
  $np->add_perfdata(
    label     => $item,
    value     => sprintf('%.0f%%',$overallPercent),
    warning => $warn,
    critical => $crit,
    min       => 0,
    max       => 100
  ); 
}

#################################
# Start Support SUBS

sub get_snmphash {
  my ($oid, $oidtext, $res, %orghash, %newhash, $oidLen, $orgKey, $newKey);
  ($oid, $oidtext) = @_;
  $res = $sess->get_table($oid);
  unless(defined $res) {
    print ">>SNMP Error:".$sess->error."  OID:$oidtext($oid)\n" if($np->opts->verbose);
    return ();
  }
  %orghash = %{$res};
  %newhash = ();
  $oidLen = length($oid)+1; #+1 for the . appeneded
  foreach $orgKey (keys %orghash)
  {
    $newKey = substr("$orgKey",$oidLen);
    $newhash{$newKey} = $orghash{$orgKey};
  }
  return %newhash;
}

sub myexit {
  my ($val, $msg) = @_;
  alarm(0);
  $np->nagios_exit($val,$msg);
}

