#!/usr/bin/env python # Copyright 2019, Lenovo # # This script is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This script is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # About this script # # # No real need to change anything below here version="1.0" ok=0 warning=1 critical=2 unknown=3 not_present = -1 exit_status = -1 state = {} state[not_present] = "Not Present" state[ok] = "OK" state[warning] = "Warning" state[critical] = "Critical" state[unknown] = "Unknown" longserviceoutput="\n" perfdata="" summary="" sys_health_msg="" sudo=False from sys import exit from sys import argv from sys import exc_info from os import getenv,putenv,environ import subprocess from distutils.log import Log # Init log level to infor log = Log(2) printf = log.info import warnings warnings.filterwarnings("ignore") from binascii import b2a_hex, a2b_hex def get_encryption_key(): uuid = '' test = subprocess.Popen(['cat', '/etc/fstab'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout,stderr = test.communicate() for i in stdout.split("\n"): if "boot" in i: for j in i.split(" "): if "UUID" in j: uuid = j return uuid key = get_encryption_key() key = key[:32] associated_data = "authenticated but not encrypted payload" # Parse some Arguments from optparse import OptionParser parser = OptionParser() parser.add_option("-m","--mode", dest="mode", help="Which check mode is in use (power,cooling,nodes,psu,psu_fan,voltage)", default="power") parser.add_option("-H","--host", dest="host", help="Hostname or IP address of the host to check") parser.add_option("-u","--ipmi_username", dest="ipmi_username", help="IPMI username ", default=None) parser.add_option("-p","--ipmi_password", dest="ipmi_password", help="IPMI authentication password ", default=None) parser.add_option("--ptag", dest="ptag", help="Tag for decrypto IPMI authentication password ", default=None) parser.add_option("-d","--debug", dest="debug", help="Enable debugging (for troubleshooting)", action="store_true", default=False) (opts,args) = parser.parse_args() if opts.host == None: parser.error("Hostname (-H) is required.") if opts.mode == None: parser.error("Mode (--mode) is required.") ipmi_options = "" def set_ipmi_options(): global ipmi_options if opts.ipmi_username is None: parser.error("--ipmi_username required with ipmitool") if opts.ipmi_password is None: parser.error("--ipmi_password required with ipmitool") if opts.ptag: password = decrypt(opts.ipmi_password, opts.ptag) ipmi_options = ipmi_options + " -I lanplus -H %s -U %s -P %s " % (opts.host, opts.ipmi_username, password) else: ipmi_options = ipmi_options + " -I lanplus -H %s -U %s -P %s " % (opts.host, opts.ipmi_username, opts.ipmi_password) def error(errortext): printf ("* Error: %s" % errortext) exit(unknown) def debug( debugtext ): if opts.debug: printf (debugtext) def nagios_status( newStatus ): global exit_status exit_status = max(exit_status, newStatus) return exit_status '''runCommand: Runs command from the shell prompt. Exit Nagios style if unsuccessful''' def runCommand(command): debug( "Executing: %s" % command ) proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE,stderr=subprocess.PIPE,universal_newlines=True) stdout, stderr = proc.communicate('through stdin to stdout') if proc.returncode > 0: if proc.returncode == 1: # timeout printf ("server %s is not accessible at the moment" % opts.host) debug ("Error %s: %s\n command was: '%s'" % (proc.returncode,stderr.strip(),command)) debug("results: %s" % (stdout.strip() ) ) if proc.returncode == 127: # File not found, lets print path path=getenv("PATH") printf ("Check if your path is correct %s" % (path)) if stderr.find('Password:') == 0 and command.find('sudo') == 0: printf ("Check if user is in the sudoers file") if stderr.find('sorry, you must have a tty to run sudo') == 0 and command.find('sudo') == 0: printf ("Please remove 'requiretty' from /etc/sudoers") exit(unknown) else: return stdout def end(): global summary global longserviceoutput global perfdata global exit_status common_info = get_common_info() printf ("%s - %s | %s" % (state[exit_status], common_info + summary,perfdata)) printf (longserviceoutput) if exit_status < 0: exit_status = unknown exit(exit_status) def add_perfdata(text): global perfdata text = text.strip() perfdata = perfdata + " %s " % (text) def add_long(text): global longserviceoutput longserviceoutput = longserviceoutput + text + '\n' def add_summary(text): global summary summary = summary + text def ipmiget(raw_cmd): ipmicommand = "ipmitool %s %s " % (ipmi_options, raw_cmd) output = runCommand(ipmicommand) return output def format_info(tip_info, item_value): ret_format_info = "" for i in tip_info.keys(): if "" == item_value[i]: continue ret_format_info += tip_info[i] + item_value[i].strip() + "; " return ret_format_info def check_psu_fan(): """ PSU1 Fan Status: ipmitool -I lanplus -H ip_address -U username -P password raw 0x32 0xA5 0x01 PSU2 Fan Status: ipmitool -I lanplus -H ip_address -U username -P password raw 0x32 0xA5 0x02 """ psu_raw_cmd = ["raw 0x32 0xA5 0x01", "raw 0x32 0xA5 0x02"] num_ok = 0 for index in range(0, len(psu_raw_cmd)): psu_fan_status = ipmiget(psu_raw_cmd[index]) psu = psu_fan_status.strip().split(" ") psu_speed = int(psu[1], 16) * 256 + int(psu[0], 16) psu_duty = int(psu[2], 16) psu_status = int(psu[3], 16) if 1 == psu_status: psu_status_show = "Abnormal" nagios_status(critical) elif 2 == psu_status: psu_status_show = "Normal" num_ok = num_ok + 1 else: psu_status_show = "Not Present" nagios_status(unknown) add_long("PSU Fan%s Speed:%sRPM Duty(%%ofMax.):%s%% Status:%s " % (index+1, psu_speed, psu_duty, psu_status_show) ) add_perfdata("'PSU Fan%s Speed'=%sRPM; 'PSU Fan%s Speed Duty'=%s%%;" % (index+1, psu_speed, index+1, psu_duty) ) add_summary("%s out of %s PSU Fan Status are health; " % (num_ok, len(psu_raw_cmd))) nagios_status(ok) def get_total_fan_power_consumption(): """ total psu Power: ipmitool -I lanplus -H ip_address -U username -P password raw 0x32 0x90 0x03 """ power_cmd = "raw 0x32 0x90 0x03" power_info = ipmiget(power_cmd) power_info = power_info.strip().split(" ") consumption_power = (int(power_info[0], 16) + int(power_info[1], 16) * 256 + int(power_info[2], 16) * 256 *256) * 0.01 add_long("Total Fans Power consumption: %sW" % consumption_power) add_perfdata("Total Fans Power consumption=%sW;" % consumption_power) def check_cooling(): """ cooling proble list: ipmitool -I lanplus -H 10.240.194.18 -U USERID -P PASSW0RD+123 sensor | grep FAN_TACH """ cooling_cmd = "sensor " cooling_info = ipmiget(cooling_cmd) num_ok = 0 num_all = 0 add_long("cooling name I status I Reading(RPM) I Lower Critical(RPM) I Lower Non_Critical(RPM)") for line in cooling_info.split("\n"): if "FAN_TACH" not in line.strip(): continue num_all += 1 every_cooling = [data.strip() for data in line.split("|")] cooling_name = every_cooling[0] cooling_reading = every_cooling[1] if "ok" == every_cooling[3]: cooling_status = "Normal" num_ok += 1 else: cooling_status = "Abnormal" nagios_status(critical) lower_critical = "N/A" if "na" == every_cooling[5] else every_cooling[5] lower_non_critical = "N/A" if "na" == every_cooling[6] else every_cooling[6] add_long(" %s I %s I %s I %s I %s" % (cooling_name, cooling_status, cooling_reading, lower_critical, lower_non_critical)) add_perfdata("%s=%sRPM;" % (cooling_name, cooling_reading)) get_total_fan_power_consumption() add_summary("%s out of %s FAN_TACH is ok;" % (num_ok, num_all)) nagios_status(ok) def get_nodes_ip_addr(): """ Nodes ip address: ipmitool -I lanplus -H ip_address -U username -P password raw 0x34 0x0F Return: node1-4 IP list """ ip_raw_cmd = "raw 0x34 0x0F" nodes_ip_info = ipmiget(ip_raw_cmd) nodes_ip_num = nodes_ip_info.strip().split(" ") ip_list = list() ip_addr = "" for index in range(0, len(nodes_ip_num)): ip_num = int(nodes_ip_num[index], 16) if 3 == (index % 4): ip_addr = "%s.%s" % (ip_addr, ip_num) ip_list.append(ip_addr) elif 0 == (index % 4): ip_addr = "%s" % (ip_num) else: ip_addr = "%s.%s" % (ip_addr, ip_num) return ip_list def get_node_power_consumption(): """ Node(1-4) Power Consumption: ipmitool -I lanplus -H ip_address -U username -P password raw 0x32 0x98 0x0(1-4) ... """ power_raw_cmd = ["raw 0x32 0x98 0x01", "raw 0x32 0x98 0x02", "raw 0x32 0x98 0x03", "raw 0x32 0x98 0x04" ] node_power_list = list() for index in range(0, len(power_raw_cmd)): power_consumption = ipmiget(power_raw_cmd[index]) pwr = power_consumption.strip().split(" ") min_w = int(pwr[1], 16) * 256 + int(pwr[0], 16) avg_w = int(pwr[3], 16) * 256 + int(pwr[2], 16) max_w = int(pwr[5], 16) * 256 + int(pwr[4], 16) if min_w == 0xff or max_w == 0xff or avg_w == 0: min_w = min_w if(min_w<0xff) else "N/A" avg_w = avg_w if(avg_w<0xff) else "N/A" max_w = max_w if(max_w<0xff) else "N/A" add_long("Node%s Power Consumption: Min.(W):%s Avg.(W):%s Max.(W):%s" % (index+1, min_w, avg_w, max_w) ) add_perfdata("Node%s Power Consumption(Avg.)=%sW;" % (index+1, avg_w) ) def check_nodes(): """ Node1 status: ipmitool -I lanplus -H ip_address -U username -P password raw 0x32 0xA7 0x01 ... Node4 status: ipmitool -I lanplus -H ip_address -U username -P password raw 0x32 0xA7 0x04 ipaddress : ipmitool -I lanplus -H ip_address -U username -P password raw 0x34 0x0F """ node_raw_cmd = ["raw 0x32 0xA7 0x01", "raw 0x32 0xA7 0x02", "raw 0x32 0xA7 0x03", "raw 0x32 0xA7 0x04" ] num_ok = 0 nodes_ip_list = get_nodes_ip_addr() for index in range(0, len(node_raw_cmd)): node_info = ipmiget(node_raw_cmd[index]) node_status = node_info.strip().split(" ") # Get Node power state power_state_int = int(node_status[0], 16) if 0x00 == power_state_int: node_power_status = "Power OFF" num_ok += 1 elif 0x20 == power_state_int: node_power_status = "No Permission" nagios_status(unknown) elif 0x40 == power_state_int: node_power_status = "Power Fault" nagios_status(critical) elif 0x80 == power_state_int: node_power_status = "Power ON" num_ok += 1 else: node_power_status = "Unknown" nagios_status(unknown) # Get Node IP address ip_addr = nodes_ip_list[index] add_long("Node%s IP address:%s Status:%s" % (index+1, ip_addr, node_power_status)) get_node_power_consumption() add_summary("%s out of %s Nodes is OK;" % (num_ok, len(node_raw_cmd))) nagios_status(ok) def get_psu_status(): """ PSU status:ipmitool -I lanplus -H ip_address -U username -P password raw 0x32 0x91 """ psu_status_cmd = "raw 0x32 0x91" psu_status_info = ipmiget(psu_status_cmd) psu_status_info = psu_status_info.strip().split(" ") psu_status_info = int(psu_status_info[2], 16) psu1_status = "Present" if (psu_status_info & 0x01) else "Not Present" psu2_status = "Present" if (psu_status_info & 0x02) else "Not Present" psu_status_list = [psu1_status, psu2_status] return psu_status_list def get_total_psu_power(): """ total psu Power: ipmitool -I lanplus -H ip_address -U username -P password raw 0x32 0x90 0x02 """ power_cmd = "raw 0x32 0x90 0x02" power_info = ipmiget(power_cmd) power_info = power_info.strip().split(" ") min_power = int(power_info[0], 16) + int(power_info[1], 16) * 256 avg_power = int(power_info[2], 16) + int(power_info[3], 16) * 256 max_power = int(power_info[4], 16) + int(power_info[5], 16) * 256 add_long("Total PSU Power: Min.:%sW Avg.:%sW Max.:%sW" % (min_power, avg_power, max_power)) add_perfdata("Total PSU Power Min.=%sW;Total PSU Power Avg.=%sW;Total PSU Power Max.=%sW" % (min_power, avg_power, max_power)) def check_psu(): """ Ratings/AC-IN/Capability: ipmitool -I lanplus -H ip_address -U username -P password raw 0x32 0xC3 0x01(0x02) """ psu_raw_cmd = ["raw 0x32 0xC3 0x01", "raw 0x32 0xC3 0x02"] num_present = 0 psu_status_list = get_psu_status() for index in range(0, len(psu_raw_cmd)): psu_info = ipmiget(psu_raw_cmd[index]) psu_info = psu_info.strip().split(" ") # Get psu Ratings, AC-IN, fan speed #psu_speed = int(psu_info[0], 16) + int(psu_info[1], 16) * 256 ac_in = int(psu_info[2], 16) + int(psu_info[3], 16) * 256 rating = int(psu_info[4], 16) + int(psu_info[5], 16) * 256 psu_status = psu_status_list[index] if "Present" == psu_status: num_present += 1 add_long("PSU%s status:%s AC-IN:%sV Ratings:%sW" % (index+1, psu_status, ac_in, rating)) get_total_psu_power() add_summary("%s out of %s PSU is Present;" % (num_present, len(psu_raw_cmd))) nagios_status(ok) def check_enclosure_power(): """ Enclosure Power: ipmitool -I lanplus -H ip_address -U username -P password raw 0x32 0x90 0x01 """ power_cmd = "raw 0x32 0x90 0x01" power_info = ipmiget(power_cmd) power_info = power_info.strip().split(" ") min_power = int(power_info[0], 16) + int(power_info[1], 16) * 256 avg_power = int(power_info[2], 16) + int(power_info[3], 16) * 256 max_power = int(power_info[4], 16) + int(power_info[5], 16) * 256 add_long("Enclosure Power: Min.:%sW Avg.:%sW Max.:%sW" % (min_power, avg_power, max_power)) add_perfdata("Enclosure Power Min.=%sW; Enclosure Power Avg.=%sW; Enclosure Power Max.=%sW;"% (min_power, avg_power, max_power)) nagios_status(ok) def check_voltage(): """ Voltage proble list: ipmitool -I lanplus -H 10.240.194.18 -U USERID -P PASSW0RD+123 sensor | grep SENSE """ voltage_cmd = "sensor" voltage_info = ipmiget(voltage_cmd) num_ok = 0 num_all = 0 add_long("Voltage name I status I Reading(V) I Lower Critical(V) I Lower Non_Critical(V) I Upper Non_critical(V) I Upper Critical(V)") for line in voltage_info.split("\n"): if "SENSE" not in line.strip(): continue num_all += 1 every_volt = [data.strip() for data in line.split("|")] volt_name = every_volt[0] volt_reading = every_volt[1] if "ok" == every_volt[3]: volt_status = "Normal" num_ok += 1 else: volt_status = "Abnormal" nagios_status(critical) lower_critical = "N/A" if "na" == every_volt[5] else every_volt[5] lower_non_critical = "N/A" if "na" == every_volt[6] else every_volt[6] upper_non_critical = "N/A" if "na" == every_volt[7] else every_volt[7] upper_critical = "N/A" if "na" == every_volt[8] else every_volt[8] add_long(" %s I %s I %s I %s I %s I %s I %s" % (volt_name, volt_status, volt_reading, lower_critical, lower_non_critical, upper_non_critical, upper_critical)) add_perfdata(" %s=%sV;" % (volt_name, volt_reading)) add_summary("%s out of %s voltage is ok;" % (num_ok, num_all)) nagios_status(ok) def get_common_info(): """ MachineType: ipmitool -I lanplus -H ip_address -U username -P password raw 0x32 0xB0 0x05 0x00 MachineSN: ipmitool -I lanplus -H ip_address -U username -P password raw 0x32 0xB0 0x05 0x01 MachineMode: ipmitool -I lanplus -H ip_address -U username -P password raw 0x32 0xB0 0x05 0x0B """ machineproductname="" machinetype="" machinemode="" machinesn="" try: #machineproductname = ipmiget("raw 0x32 0xB0 0x05 0x00") machinetype = ipmiget("raw 0x32 0xB0 0x05 0x00") machinesn = ipmiget("raw 0x32 0xB0 0x05 0x01") machinemode = ipmiget("raw 0x32 0xB0 0x05 0x0B") #machineproductname = a2b_hex(machineproductname.strip().replace(" ","")) machinetype = a2b_hex(machinetype.strip().replace(" ","")) machinesn = a2b_hex(machinesn.strip().replace(" ","")) machinemode = a2b_hex(machinemode.strip().replace(" ","").replace("\n","")) except: debug("Failed in get common info") common_info = 'System: "%s"; MTM: "%s"; SN: "%s"; ' % (machinemode.strip(), machinetype.strip(), machinesn.strip()) return common_info def decrypt(ciphertext, tag = None): from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import ( Cipher, algorithms, modes ) iv = key[:12] # Construct a Cipher object, with the key, iv, and additionally the # GCM tag used for authenticating the message. decryptor = Cipher( algorithms.AES(key), modes.GCM(iv, a2b_hex(tag)), backend=default_backend() ).decryptor() # We put associated_data back in or the tag will fail to verify # when we finalize the decryptor. decryptor.authenticate_additional_data(associated_data) # Decryption gets us the authenticated plaintext. # If the tag does not match an InvalidTag exception will be raised. return decryptor.update(a2b_hex(ciphertext)) + decryptor.finalize() if __name__ == '__main__': try: set_ipmi_options() if opts.mode == 'power': check_enclosure_power() elif opts.mode == 'psu_fan': check_psu_fan() elif opts.mode == 'cooling': check_cooling() elif opts.mode == 'nodes': check_nodes() elif opts.mode == 'psu': check_psu() elif opts.mode == 'voltage': check_voltage() else: parser.error("%s is not a valid option for --mode" % opts.mode) except IOError: printf ("Unhandled exception while running script " ) infor = exc_info() debug ("%s : %s" % (infor[0], infor[1])) exit(unknown) else: end()