#!/usr/bin/python

"""Check disk service time.

Run iostat on a device and check service time.  Warn is above specified threshold.
"""

__author__ = 'Tucker <junk^at^gmail^dot^com>'


import commands
import os
import re
import stat
import sys
from optparse import OptionParser

# Globals
IOSTAT_BIN = '/usr/bin/iostat'


def get_block_devs():
  """Parse /etc/mtab for valid block devices.

  Args:
    None
  
  Returns:
    devices: list of all block devices found
  """
  # Variable setup.
  devices = []

  # If we don't have read access to /etc/mtab, we need to warn and bail.
  try:
    mtab_fh = open('/etc/mtab', 'r')
  except IOError:
    print 'Could not open /etc/mtab.'
    sys.exit(3)

  # Look for any lines that start with '/dev/'.
  device_pattern = re.compile('^/dev/')
  for line in mtab_fh:
    if device_pattern.match(line):
      # Append the first 'word' found in a matching line.
      devices.append(line.split()[0])

  return devices


def output_and_exit(messages, code=0):
  """Output messages and exit with correct return code.

  Args:
    messages: list of all status messages
    code: integer representing our exit code

  Returns:
    None
  """
  for message in messages:
    print message
  sys.exit(code)


def main():
  """Check service time on specified block device."""
  # Setup our flags.
  flags = OptionParser()
  flags.add_option('-w', '--warn', dest='warning', type='int',
                   default=15, help='Warning threshold', metavar='INT')
  flags.add_option('-c', '--crit', dest='critical', type='int',
                   default=20, help='Critical threshold', metavar='INT')
  flags.add_option('-d', '--dev', dest='device', type='string',
                   default='ALL', help='Device to check', metavar='DEV')
  (opts, dummy) = flags.parse_args()

  # Assign options.
  dev = opts.device

  # If we're not checking 'ALL,' make sure our device exists and is a valid
  # block device.
  if not dev is 'ALL':
    try:
      dev_st_mode = os.lstat(dev)[0]
      # Bail if the device specified isn't a block device.
      if not stat.S_ISBLK(dev_st_mode):
        print '%s is not a valid block device.' % dev
        sys.exit(3)
      # We'll need to be able to iterate over this later.
      all_devices = [dev]
    except OSError:
      # Bail if the device isn't found at all.
      print '%s not found.' % dev
      sys.exit(3)
  else:
    all_devices = get_block_devs()

  # Default our warning levels and messages.
  warn = False
  crit = False
  messages = []

  # Loop through all our devices and set any alerts levels and messages.
  for device in all_devices:
    # Grab device service time
    iostat_data = commands.getstatusoutput('%s -x %s' % (IOSTAT_BIN, device))

    # This is a literal, based on iostat output
    stats_list = iostat_data[1].split('\n')[-2].split()
    dev_svctm = stats_list[10]

    # Check our critical threshold.
    if float(dev_svctm) >= opts.critical:
      messages.append('Device service time on %s critical: %sms' %
                      (device, dev_svctm))
      crit = True
    # Check our warning threshold.
    elif float(dev_svctm) >= opts.warning:
      messages.append('Device service time on %s warning: %sms' %
                      (device, dev_svctm))
      warn = True
    # Say things are fine.
    else:
      messages.append('Device service time on %s within range: %sms' %
                      (device, dev_svctm))

  # Exit, outputting our messages, based on the most extreme level.
  if crit:
    output_and_exit(messages, 2)
  elif warn:
    output_and_exit(messages, 1)
  else:
    output_and_exit(messages)


if __name__ == '__main__':
  main()
