#!/usr/bin/python
#
# Anchor System - http://www.anchor.com.au
#
# Oliver Hookins 
# Paul De Audney 
# Barney Desmond
#
# Edit by Belgotux - http://www.monlinux.net
#
# check-aacraid.py
#
# Grabs the output from "/usr/sbin/arcconf GETCONFIG 1 LD" then
# determines the health of the Logical Devices.
#
# Grabs the output from "/usr/sbin/arcconf GETCONFIG 1 AL" then
# determines the health of various status indicators from the card
# and drives.
#
# Grabs the output from "/usr/sbin/arcconf GETCONFIG 1 PD" then
# determines the health of drives.
#
# After the checks are run, it deletes the file "UcliEvt.log" from
# the current working directory.
# 
# Add this to your "/etc/sudoers" file:
# "nagios ALL=(root) NOPASSWD: /usr/sbin/arcconf GETCONFIG *"
#
# v0.1 - only checks card information so far, not drives yet
# v0.2 - checks logical volume status & wipes log
# v0.3 - strips trailing "," & tells you the logical volume with
#        the failure
# v1.0 - check arcconf GETCONFIG 1 PD too and correct ZMM status
#        battery, include nagios status state OK WARN and CRITICAL
#        change state return code, nagios always OK=0 WARN=1 CRIT=2
#        by Belgotux http://www.monlinux.net


import sys, os, re, string, subprocess
from subprocess import PIPE, Popen

c_status_re = re.compile('^\s*Controller Status\s*:\s*(.*)$')
l_status_re = re.compile('^\s*Status of logical device\s*:\s*(.*)$')
l_device_re = re.compile('^Logical device number ([0-9]+).*$')
c_defunct_re = re.compile('^\s*Defunct disk drive count\s:\s*([0-9]+).*$')
c_degraded_re = re.compile('^\s*Logical devices/Failed/Degraded\s*:\s*([0-9]+)/([0-9]+)/([0-9]+).*$')
b_status_re = re.compile('^\s*Status\s*:\s*(.*)$')
b_temp_re = re.compile('^\s*Over temperature\s*:\s*(.*)$')
b_capacity_re = re.compile('\s*Capacity remaining\s*:\s*([0-9]+)\s*percent.*$')
b_time_re = re.compile('\s*Time remaining \(at current draw\)\s*:\s*([0-9]+) days, ([0-9]+) hours, ([0-9]+) minutes.*$')
d_device_re = re.compile('^\s*Device\s*\s*#([0-9]+)\s*$')
d_state_re = re.compile('^\s*State\s*:\s*(.*)$')

cstatus = lstatus = ldevice = cdefunct = cdegraded = bstatus = btemp = bcapacity = btime = ddecide = dstate = ""
lnum = ""
check_status = 0
check_warn = 0
check_crit = 0
result = ""

for line in Popen(["/usr/bin/sudo","/usr/sbin/arcconf" , "GETCONFIG","1","LD"], stdin=PIPE, stdout=PIPE, close_fds=True).stdout:
	# Match the regexs
	ldevice = l_device_re.match(line)
	if ldevice:
		lnum = ldevice.group(1)
		continue

	lstatus = l_status_re.match(line)
	if lstatus:
		if lstatus.group(1) != "Optimal":
			check_status = 2
			check_crit = 1
		result += "Logical Device " + lnum + " " + lstatus.group(1) + ","

for line in Popen(["/usr/bin/sudo","/usr/sbin/arcconf" , "GETCONFIG","1","AD"], stdin=PIPE, stdout=PIPE, close_fds=True).stdout:
	# Match the regexs
	cstatus = c_status_re.match(line)
	if cstatus:
		if cstatus.group(1) != "Optimal":
			check_status = 2
			check_crit = 1
		result += "Controller " + cstatus.group(1) + ","
		continue

	cdefunct = c_defunct_re.match(line)
	if cdefunct:
		if int(cdefunct.group(1)) > 0:
			check_status = 2
			check_crit = 1
			result += "Defunct drives " + cdefunct_group(1) + ","
		continue

	cdegraded = c_degraded_re.match(line)
	if cdegraded:
		if int(cdegraded.group(2)) > 0:
			check_status = 2
			check_crit = 1
			result += "Failed drives " + cdegraded.group(2) + ","
		if int(cdegraded.group(3)) > 0:
			check_status = 2
			check_crit = 1
			result += "Degraded drives " + cdegraded.group(3) + ","
		continue

	bstatus = b_status_re.match(line)
	if bstatus:
		if bstatus.group(1) == "Not Installed":
			continue
		
		if bstatus.group(1) == "Charging":
			if check_status < 2:
				check_status = 1
				check_crit = 1
		elif bstatus.group(1) != ("Optimal" and "ZMM Optimal"):
			check_status = 2
			check_crit = 1
		result += "Battery Status " + bstatus.group(1) + ","
		continue
	
	btemp = b_temp_re.match(line)
	if btemp:
		if btemp.group(1) != "No":
			check_status = 2
			check_crit = 1
			result += "Battery Overtemp " + btemp.group(1) + ","
		continue

	bcapacity = b_capacity_re.match(line)
	if bcapacity:
		result += "Battery Capacity " + bcapacity.group(1) + "%,"
		if bcapacity.group(1) < 50:
			if check_status < 2:
				check_status = 1
				check_crit = 1
		if bcapacity.group(1) < 25:
			check_status = 2
			check_crit = 1
		continue

	btime = b_time_re.match(line)
	if btime:
		timemins = int(btime.group(1)) * 1440 + int(btime.group(2)) * 60 + int(btime.group(3))
		if timemins < 1440:
			if check_status < 2:
				check_status = 1
				check_crit = 1
		if timemins < 720:
			check_status = 28
			check_crit = 1
		result += "Battery Time "
		if timemins < 60:
			result += str(timemins) + "mins,"
		else:
			result += str(timemins/60) + "hours,"

for line in Popen(["/usr/bin/sudo","/usr/sbin/arcconf" , "GETCONFIG","1","PD"], stdin=PIPE, stdout=PIPE, close_fds=True).stdout:
	# Match the regexs
	ddevice = d_device_re.match(line)
	if ddevice:
		dnum = ddevice.group(1)
		continue

	dstate = d_state_re.match(line)
	if dstate:
		if dstate.group(1) == "Ready":
			check_warn = 1
			check_status = 1
		elif dstate.group(1) != "Online":
			check_crit = 1
			check_status = 2
		result += "Device " + dnum + " " + dstate.group(1) + ","

if result == "":
	result = "No output from arcconf!"
	check_status = 3

# strip the trailing "," from the result string.
result = result.rstrip(",")
nagios_state = "AACRAID "
if check_status == 0:
	nagios_state += "OK: " + result
#show critial first if they are warning and critical errors
elif check_crit == 1:
	nagios_state += "CRITICAL: " + result
elif check_warn == 1:
	nagios_state += "WARNING: " + result
else:
	nagios_state += "CRITICAL: " + result
print nagios_state

try:
	cwd = os.getcwd()
	fullpath = os.path.join(cwd,'UcliEvt.log')
	os.unlink(fullpath)
except:
	pass

sys.exit(check_status)
