#!/usr/bin/perl -w # # NAGIOS Plugin: check_3ware_disk # Check the status of the 3ware raid controller disks with tw_cli # Author: Marius Hein # NETWAYS GmbH, www.netways.de, info@netways.de # # Additions by Ianaré Sévi : # SSH addon-on, newer style report, more keywords # Mauled over by John Jore, j o h n @ j o r e . n o # Removed stuff I dont want or need, added Disk Temp and ReAllocated Sectors checks. # Most severe error "wins" and is reported # # One of the problems with the 3ware controller is the disks can, and do, move around from one port to another. How do you support command line parameters # when this happens? My Seagate disks run 4-6 degress C cooler than the WD disks. Using a command line parameter would make sense, but not possible. # Maybe creating some kind of database/lookup with different models and "acceptable" temperature ranges is the way forward? use strict; use File::Basename; use Getopt::Long; use vars qw( $opt_disk $opt_control $opt_help $opt_usage $progname @state_ok @state_warning @state_critical $temp_ok $temp_warning $ra_ok $ra_warning $return $command %conf $tmp1 $tmp2 $state_stat $state_temp $state_ra $state_ncq $desc_stat $desc_temp $desc_ra $desc_ncq $desc_model $exit_stat $exit_temp $exit_ra $exit_ncq $state_out $exit_out $stat ); sub print_help(); sub print_usage(); $progname = basename($0); @state_ok=("OK","VERIFYING"); @state_warning=("DEGRADED","INITIALIZING","MIGRATING"); @state_critical=("OFFLINE","DEVICE-ERROR"); $temp_ok = 51; #Upto this is ok $temp_warning = 55; #Between ok and this is warning #$temp_critical= xx; #Anyting else is critical $ra_ok = 39; #Upto 20 is ok $ra_warning = 40; #Upto 30 is warning #$ra_critical = xx; #Anything else is critical $conf{'bin'} = "tw_cli"; $conf{'fullbin'} = "/usr/sbin/$conf{'bin'}"; $conf{'cmd_part_status'} = "show status"; $conf{'cmd_part_temperature'} = "show temperature"; $conf{'cmd_part_rasect'} = "show rasect"; $conf{'cmd_part_ncq'} = "show ncq"; $conf{'cmd_part_model'} = "show model"; Getopt::Long::Configure('bundling'); GetOptions ( "controller=s" => \$opt_control, "C=s" => \$opt_control, "disk=s" => \$opt_disk, "P=s" => \$opt_disk, "h" => \$opt_help, "help" => \$opt_help, "usage" => \$opt_usage ) || die "try '$progname --help' for informations.\n"; #Default controller and disk (My 3ware starts on P8 with the latest firmware. Go figure) if (!$opt_control) { $opt_control=0; } if (!$opt_disk) { $opt_disk=8; } sub print_help() { print "\n"; print "check 3ware >>HELP<<\n"; print "\n"; print "\t --help, -h\t\t\t help screen.\n"; print "\t --usage\t\t\t little usage\n"; print "\n"; print "\t --controller, -C\t\t Controller ID\n"; print "\t --disk, -P\t\t\t Disk ID\n"; } sub print_usage() { print "\n"; print "check 3ware >>USAGE<<\n"; print "\n"; print "\t$progname -C 0 -P 8 (checks Controller 0 Disk 8 for status)\n"; print "\t$progname --help (Displays the Helpmessage)\n"; print "\n"; } if ($opt_help) { print_help(); exit; } if ($opt_usage) { print_usage(); exit; } if ($opt_control >= 0 && $opt_disk >= 0) { #Disk Status $command = "sudo ".$conf{'fullbin'}." /c$opt_control/p$opt_disk ".$conf{'cmd_part_status'}; #print "$command\n"; $return = qx ( $command ); #print $return; ($tmp1,$tmp2) = split(/= /,$return); $tmp2 =~ s/\s+$//; $stat = $tmp2; $desc_stat = "Disk /c$opt_control/p$opt_disk is $stat"; foreach (@state_ok) { if ($stat eq $_) { $state_stat = "OK"; $exit_stat = 0; } } if (!$state_stat) { foreach (@state_warning) { if ($stat eq $_) { $state_stat = "WARNING"; $exit_stat = 1; } } } if (!$state_stat) { foreach (@state_critical) { if ($stat eq $_) { $state_stat = "CRITICAL"; $exit_stat = 2; } } } if (!$state_stat) { $state_stat = "UNKNOWN"; $exit_stat = 3; } #print "$progname: $state_stat ($desc_stat)\n"; #Disk Model $command = "sudo ".$conf{'fullbin'}." /c$opt_control/p$opt_disk ".$conf{'cmd_part_model'}; $return = qx ( $command ); ($tmp1,$tmp2) = split(/= /,$return); $tmp2 =~ s/\s+$//; $desc_model = $tmp2; #print "$desc_model\n"; #NCQ $command = "sudo ".$conf{'fullbin'}." /c$opt_control/p$opt_disk ".$conf{'cmd_part_ncq'}; $return = qx ( $command ); ($tmp1,$tmp2) = split(/Enabled /,$return); ($tmp1,$return) = split(/= /,$tmp2); $return =~ s/\s+$//; $stat = substr($return,0,3); #print "Stat: $stat \n"; $desc_ncq = "NCQ is $stat"; if ( "$stat" eq "No" ) { $state_ncq = "OK"; $exit_ncq = 0; } if (!$state_ncq) { if ( $desc_model eq "ST3750330NS" ) { $state_ncq = "OK"; $exit_ncq = 0; } else { $state_ncq = "CRITICAL"; $exit_ncq = 2; } } if (!$state_ncq) { $state_ncq = "UNKNOWN"; $exit_ncq = 3; } #print "$state_ncq ($desc_ncq)\n"; #Disk Temperature $command = "sudo ".$conf{'fullbin'}." /c$opt_control/p$opt_disk ".$conf{'cmd_part_temperature'}; #print "$command\n"; $return = qx ( $command ); #print $return; ($tmp1,$tmp2) = split(/= /,$return); $tmp2 =~ s/\s+$//; $stat = substr($tmp2,0,2); $desc_temp = "Temp: is $stat"; if ($stat >= 0 and $stat <= $temp_ok) { $state_temp = "OK"; $exit_temp = 0; } if (!$state_temp) { if ($stat > $temp_ok and $stat < $temp_warning) { $state_temp = "WARNING"; $exit_temp = 1; } } if (!$state_temp) { if ($stat >= $temp_warning) { $state_temp = "CRITICAL"; $exit_temp = 2; } } if (!$state_temp) { $state_temp = "UNKNOWN"; $exit_temp = 3; } #print "$progname: $state_temp ($desc_temp)\n"; #Reallocated Sectors $command = "sudo ".$conf{'fullbin'}." /c$opt_control/p$opt_disk ".$conf{'cmd_part_rasect'}; #print "$command\n"; $return = qx ( $command ); #print $return; ($tmp1,$tmp2) = split(/= /,$return); $tmp2 =~ s/\s+$//; $stat = $tmp2; $desc_ra = "RA Sect: is $stat"; if ($stat >= 0 and $stat <= $ra_ok) { $state_ra = "OK"; $exit_ra = 0; } if (!$state_ra) { if ($stat > $ra_ok and $stat < $ra_warning) { $state_ra = "WARNING"; $exit_ra = 1; } } if (!$state_ra) { if ($stat >= $ra_warning) { $state_ra = "CRITICAL"; $exit_ra = 2; } } if (!$state_ra) { $state_ra = "UNKNOWN"; $exit_ra = 3; } #print "$progname: $state_ra ($desc_ra)\n"; # print "$exit_stat $exit_temp $exit_ra\n"; #Most severe exit code my @exit_status = ($exit_stat, $exit_ncq, $exit_temp, $exit_ra,); @exit_status = sort{$a <=> $b} @exit_status; $exit_out = pop (@exit_status); #print "largest is $exit_out\n"; #Which is the most severe exit status? if ($exit_out > 0) { if ($exit_stat == $exit_out) { $state_out = "$state_stat (Status)"; #print "1\n"; } if ($exit_ncq == $exit_out) { $state_out = "$state_ncq (NCQ)"; #print "2\n"; } if ($exit_temp == $exit_out) { $state_out = "$state_temp (Temp)"; #print "3\n"; } if ($exit_ra == $exit_out) { $state_out = "$state_ra (RA)"; #print "4\n"; } } else { $state_out = "OK"; } #We have a winner: print "$state_out ($desc_stat, $desc_ncq, $desc_temp, $desc_ra)\n"; exit $exit_out; } else { print_help(); exit; }