#!/bin/sh # -x add for debugging # Script to check the CPU (s) Temperature of Dell PowerEdge Server (run from a Linux system) # Written by: Lewis Getschel # Date: 11/12/04 # Parameters: 1 - the name of the system to check # Operation: Check the CPU temperature(s) using snmpget and snmpwalk # Limitiation: according to Nagios Plugin recommendations, I tried to use absolute paths to all commands ('sorta) # After completing, I realized that this script runs from linux, but that the linux servers # aren't running the smtp agents, so I can only check the windows systems from linux # Version History: # 11/12/2004 First try, Not bad. Had issues with if's comparing decimals, so I truncated by printing # only the value before the decimal by using awk. # # Example output is from Master014 Dell PowerEdge Server, Other units ARE different!! # snmpwalk -Os -c public -v 1 master014 1.3.6.1.4.1.674.10892.1.700.20.1.8 # Name temperatureProbeLocationName # Object ID 1.3.6.1.4.1.674.10892.1.700.20.1.8 # Description Defines the location of the temperature probe in this chassis. # enterprises.674.10892.1.700.20.1.8.1.3 = STRING: "ESM CPU 1 Temp" # enterprises.674.10892.1.700.20.1.8.1.4 = STRING: "ESM CPU 2 Temp" # Name: temperatureProbeUpperCriticalThreshold # Object ID: 1.3.6.1.4.1.674.10892.1.700.20.1.10 # Description: Defines the value of the temperature probes upper critical threshold. # enterprises.674.10892.1.700.20.1.10.1.3 = INTEGER: 850 # enterprises.674.10892.1.700.20.1.10.1.4 = INTEGER: 850 # Name temperatureProbeReading # Object ID 1.3.6.1.4.1.674.10892.1.700.20.1.6 # Description Defines the value of the temperature probe. # When the value for temperatureProbeType is other than temperatureProbeTypeIsDiscrete, the value returned for this attribute is the temperature that the probe is reading in tenths of degrees Centigrade. When the value for temperatureProbeType is temperatureProbeTypeIsDiscrete, a value is not returned for this attribute. # enterprises.674.10892.1.700.20.1.6.1.3 = INTEGER: 330 # enterprises.674.10892.1.700.20.1.6.1.4 = INTEGER: 350 # OK, lets figure out which location (index) are the CPUs index1=`/usr/bin/snmpwalk -Os -c public -v 1 $1 1.3.6.1.4.1.674.10892.1.700.20.1.8 | /bin/grep -i 'cpu 1' | /usr/bin/awk -F= '{print $1}' | /usr/bin/awk -F. '{print $10}'` index2=`/usr/bin/snmpwalk -Os -c public -v 1 $1 1.3.6.1.4.1.674.10892.1.700.20.1.8 | /bin/grep -i 'cpu 2' | /usr/bin/awk -F= '{print $1}' | /usr/bin/awk -F. '{print $10}'` # Ok, lets get the "upper critical threshold" temperature of the systems CPU's (in tenths of degrees C) upper_critical1=`/usr/bin/snmpwalk -Os -c public -v 1 $1 1.3.6.1.4.1.674.10892.1.700.20.1.10.1.$index1 | /usr/bin/awk -F: '{print $2}' | /usr/bin/awk '{print $1}'` upper_critical2=`/usr/bin/snmpwalk -Os -c public -v 1 $1 1.3.6.1.4.1.674.10892.1.700.20.1.10.1.$index2 | /usr/bin/awk -F: '{print $2}' | /usr/bin/awk '{print $1}'` # OK, lets get the current temperature of the CPU's themselves (in tenths of degrees C) current_cpu1=`/usr/bin/snmpwalk -Os -c public -v 1 $1 1.3.6.1.4.1.674.10892.1.700.20.1.6.1.$index1 | /usr/bin/awk -F: '{print $2}' | /usr/bin/awk '{print $1}'` current_cpu2=`/usr/bin/snmpwalk -Os -c public -v 1 $1 1.3.6.1.4.1.674.10892.1.700.20.1.6.1.$index2 | /usr/bin/awk -F: '{print $2}' | /usr/bin/awk '{print $1}'` # I think in degrees F, I'll convert it... # Convert degrees C to degrees F ( F=((C * 1.8) + 32) ) current_cpu1=`echo "scale=0; (($current_cpu1 / 10) * 1.8) +32" | /usr/bin/bc | /usr/bin/awk -F. '{print $1}'` current_cpu2=`echo "scale=0; (($current_cpu2 / 10) * 1.8) +32" | /usr/bin/bc | /usr/bin/awk -F. '{print $1}'` upper_critical1=`echo "scale=0; (($upper_critical1 / 10) * 1.8) +32" | /usr/bin/bc | /usr/bin/awk -F. '{print $1}'` upper_critical2=`echo "scale=0; (($upper_critical2 / 10) * 1.8) +32" | /usr/bin/bc | /usr/bin/awk -F. '{print $1}'` # Calculate where the 90% warning temp is upper_warning1=`echo "scale=1;$upper_critical1 * 0.9" | /usr/bin/bc | /usr/bin/awk -F. '{print $1}'` upper_warning2=`echo "scale=1;$upper_critical2 * 0.9" | /usr/bin/bc | /usr/bin/awk -F. '{print $1}'` # for debugging, the following lines are useful: #echo index1 for cpu 1 : $index1 # echo index1 for cpu 2 : $index2 # echo current_cpu1 : $current_cpu1 # echo current_cpu2 : $current_cpu2 # echo upper_critical1 : $upper_critical1 # echo upper_critical12 : $upper_critical2 # Now, do the real work on comparing the values, Critical 1st if [ $current_cpu1 -ge $upper_critical1 ] then echo CRITICAL - CPU 1 is $current_cpu1 F. degrees, Over Critical Temperature Limit of $upper_critical1 F. degrees exit 2 fi if [ $current_cpu2 -ge $upper_critical2 ] then echo CRITICAL - CPU 2 is $current_cpu2 F. degrees, Over Critical Temperature Limit of $upper_critical2 F. degrees exit 2 fi # Now, for a Warning at 90% of critical if [ $current_cpu1 -ge $upper_warning1 ] then echo WARNING - CPU 1 is $current_cpu1 F. degrees, Over Warning Temperature Limit of $upper_warning1 F. degrees exit 1 fi if [ $current_cpu2 -ge $upper_warning2 ] then echo WARNING - CPU 2 is $current_cpu2 F. degrees, Over Warning Temperature Limit of $upper_warning2 F. degrees exit 1 fi # Finally, OK if under critical if [ $current_cpu1 -lt $upper_critical1 -a $current_cpu2 -lt $upper_critical2 ] then echo OK - CPU 1 is $current_cpu1 F. degrees, CPU 2 is $current_cpu2 F. degrees exit 0 fi