#!/usr/bin/perl

# **************************************************************
# *
# * CHECK_MAIL_LOOP_DELAY
# *
# * Program: Linux plugin for Nagios
# * License: GPL
# * Copyright (c) 2014 - By Victor Ruiz Rivas (vruiz@adif.es)
# *
# * Description:
# *
# * This script is an hybrid of active and passive check plugin.
# * It forks to run a child process which submits final results
# * of the mail loop as passive check through Nagios command file.
# *
# * First the parent process sends a probe message connecting to an
# * MTA server without client authentication and returns immediately
# * to Nagios with the same service state it was given at command
# * line and notifying of the message sent and its timestamp.
# * 
# * The child process detaches from parent process and sleeps for a
# * warning threshold period of time. It then awakes and connects
# * to the mail box server to receive the probe message sent by
# * the parent process and to delete it if it founds it. Otherwise it
# * sleeps again until critical threshold and retries connection
# * and deletion again, submitting results before exiting.
# *
# * Based on the found (or missing) probe within (out of)
# * thresholds, Nagios is notified with "OK" if probe is found after
# * the first connection, "WARNING" if found after the second or
# * "CRITICAL" if not found. Also it is informed of possible delayed
# * messages arrived from previous executions of the plugin.
# * 
# *
# * Nagios main config file options affecting this plugin:
# *
# *    - accept_passive_service_checks=1
# *    - check_external_commands=1
# *    - command_file=<nagios_command_file_path>
# *
# * Service definition directives suggested for this plugin:
# * 
# *    - command_check_interval gt (warning + critical thresholds)[s]
# *    - max_check_attempts=1
# *
# * Possible usage in Nagios check command definition:
# *    
# * command_line    $USER1$/check_mail_loop_delay.pl -H "$HOSTNAME$" \
# *                  -D "$SERVICEDESC$" -S "$SERVICESTATEID$" \
# *                  -A "$ARG1$" -M "$ARG2$" -F "$ARG3$" -T "$ARG4$" \
# *                  -P "$ARG5$" -u "$ARG6$" -p "$ARG7$" -w "$ARG8$" \
# *                  -c "$ARG9" -f "$ARG10$"    
# *
# *
# * License Information:
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# *
# * $Id: check_mail_loop_delay.pl
# *
# ***********************************************************************



use strict;
use warnings;
use Mail::Sender;
use Net::IMAP::Simple;
use Mail::POP3Client;
use Getopt::Long qw( :config posix_default bundling no_ignore_case );
use Time::HiRes  qw(time);
use Fcntl        qw(:flock);
use POSIX        qw(setsid strftime);

use constant { true => 1, false => 0 };


######## GLOBAL VARS & DEFAULTS #################################

use vars qw( $help $hostname $servicestateid $servicedesc $serviceoutput
             $mta_server $mail_server $timestamp $user_account $pid $tip
             $user_password $mail_from $mail_to $warning_threshold
             $critical_threshold %ERRORS $mail_protocol $subject
             $timeout $delayeds $receive_message $nagios_cmd_file
           );

# Adjust thresholds to your servers work load
$warning_threshold  = 120;      # default 2 minutes warning threshold for receiving messages
$critical_threshold = 420;      # default 7 minutes critical threshold for receiving messages

$timeout   = 30;                # default mta & mail servers response timeout
$subject   = "mail loop test";  # default header subject and body message substring
$timestamp = get_time();        # timestamp signature to label message header's subject

# Return codes for Nagios
%ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4);



######## GET CLI ARGUMENTS & Nagios MACROS ####################

GetOptions( "h|help"          => \$help,
            "H|host_name=s"   => \$hostname,             # Nagios $HOSTNAME$ macro value
            "D|svc_descr=s"   => \$servicedesc,          # Nagios $SERVICEDESCR$ macro value
            "S|svc_state=i"   => \$servicestateid,       # Nagios last check $SERVICESTATEID$ macro value
            "A|mta_srv=s"     => \$mta_server,           # smtp server for sending messages
            "M|mail_srv=s"    => \$mail_server,          # imap or pop3 mail server
            "F|mail_from=s"   => \$mail_from,            # MAIL from: sender account
            "T|mail_to=s"     => \$mail_to,              # RCPT to: recipient account
            "P|prot=s"        => \$mail_protocol,        # protocol to use: pop3 or imap
            "u|user=s"        => \$user_account,         # mail account user
            "p|passwd=s"      => \$user_password,        # mail account password
            "w|warn:i"        => \$warning_threshold,    # optional loop warning time(seconds) threshold
            "c|crit:i"        => \$critical_threshold,   # optional loop critical time(seconds) threshold
            "s|subject:s"     => \$subject,              # optional subject string
            "f|cmd_file=s"    => \$nagios_cmd_file,      # Nagios command file for passive check submission
            "t|timeout:i"     => \$timeout               # optional timeout for connecting to servers

          )  or die("Error in command line arguments: $!\n");     

usage() unless check_options();


######################################################################################
######################  SINGLE NAGIOS SERVICE CHECK INSTANCE RUNNING  ################

# Due to the hybrid nature of this plugin running two paired processes (parent:send,
# child:receive),for some check intervals and thresholds, Nagios plugin execution
# could reach certain running conditions in which more than one plugin instance for
# the same service check could be running concurrently. Therefore to prevent deletion
# of probe messages sent for a different instance of the same service check that could
# yield distorted results we should avoid concurrency at the service checks. In doing
# so we refrain ourselves from checking more than one mail loop, but, should you ever
# need to check different mail loops I suggest to comment out the next "flock" and the
# final "__DATA__" blocks and define safe service check intervals and thresholds for
# the services as suggested above, or just create a copy of this script and rename it
# for each account/loop to test.

unless (flock(DATA, LOCK_EX|LOCK_NB)){

   # One instance already running, exit and warn Nagios

   return_to_nagios(" (*) can't flock $0: another plugin instance may be running! $!\n", $ERRORS{'WARNING'});
}

###########################################################
###########################################################
################## CHECK MAIL LOOP ########################

# System will reap automatically orphan child processes
$SIG{CHLD} = 'IGNORE';

# flush buffers before forking and detach
$| = 1;

defined ( $pid = fork()) or
   # could not fork
   return_to_nagios("fork error: $!\n", $ERRORS{'UNKNOWN'});

if ($pid) {
   # I'm the parent
   send_probe_message();
   return_to_nagios("message sent to $user_account with $mta_server at $timestamp\n", $servicestateid);

}else{
   # Detach child process 
   setsid() or die "setsid can't start a new session: $!\n";
   open STDIN, "< /dev/null" or die "can't read /dev/null: $!\n";
   open STDOUT, ">> /dev/null" or die "can't write /dev/null: $!\n";
   open STDERR, ">> /dev/null" or die "can't write /dev/null: $!\n";
   open (CMDFILE, ">> $nagios_cmd_file") or die "can't open $nagios_cmd_file: $!\n"
}


# SECOND PART OF THE MAIL LOOP: retrieve, delete messages
# and submit results to Nagios.  


# Set proper mail protocol retrieving function reference
if ( $mail_protocol eq "pop3" ) {
   $receive_message = \&receive_pop3_message;
}else{
   $receive_message = \&receive_imap_message;
}

# First sleep for warning threshold wait
sleep($warning_threshold);

if ( $receive_message->() ) {

   $serviceoutput = "OK: mail message sent to '$mail_to' with MTA '$mta_server' server, received in '$mail_server' server in less than $warning_threshold seconds since $timestamp";
   $servicestateid = $ERRORS{'OK'};

}else{

   # Second sleep for critical threshold wait
   sleep($critical_threshold - $warning_threshold);

   if ( $receive_message->() ) {
      $serviceoutput = "WARNING: mail message sent to <b>'$mail_to'<\/b> with MTA <b>'$mta_server'<\/b> server, received in <b>'$mail_server'<\/b> server, took more than <b>$warning_threshold<\/b> and less than <b>$critical_threshold<\/b> seconds since <b>$timestamp<\/b>";
      $servicestateid = $ERRORS{'WARNING'};
   }else{
      $serviceoutput = "CRITICAL: mail message sent to <b>'$mail_to'<\/b> with MTA <b>'$mta_server'<\/b> server not yet received in <b>'$mail_server'<\/b> server after <b>$critical_threshold<\/b> seconds elapsed since <b>$timestamp<\/b>";
      $servicestateid = $ERRORS{'CRITICAL'};
   };
};

if ($delayeds > 0) {
   $serviceoutput =  "$serviceoutput - received <b>$delayeds<\/b> delayed message(s)";
};

submit_to_nagios($serviceoutput, $servicestateid);


################ END MAIL LOOP CHECK ######################
###########################################################
###########################################################


###########################################################
###########################################################
################## FUNCTIONS ##############################


# Sends probe message (from the parent process)
sub send_probe_message {

   my $s = $subject." ".$timestamp;

   my $sender = new Mail::Sender{
                                  smtp      => $mta_server,
                                  from      => $mail_from,
                                  to        => $mail_to,
                                  subject   => $s,
                                  timeout   => $timeout,
                                  on_errors => undef };

   unless ( ref $sender ) {
      return_to_nagios("error creating mail sender object: $!\n", $ERRORS{'CRITICAL'});
   }

   $sender->Open()
      or return_to_nagios("error opening message to $mta_server: $!\n", $ERRORS{'CRITICAL'});

   $sender->SendLineEnc($s)
      or return_to_nagios("error writing message to $mta_server: $!\n", $ERRORS{'CRITICAL'}); 

   $sender->Close()
      or return_to_nagios("error closing and sending message to $mta_server: $!\n", $ERRORS{'CRITICAL'});

}
#####################################


# Receives and deletes sent probe message(s) from imap server user inbox folder
sub receive_imap_message {

   $delayeds = 0;
   my $received = false;

   # Create the imap client object
   my $imap = Net::IMAP::Simple->new( $mail_server ) ||
      submit_to_nagios("Unable to establish connection to $mail_server server: $!\n", $ERRORS{'CRITICAL'});

   # Log on
   if( ! $imap->login( $user_account, $user_password ) ){
      submit_to_nagios("Login $user_account user to $mail_server failed: $imap->errstr\n", $ERRORS{'CRITICAL'});
   }

   my $total_msgs = $imap->select('INBOX');

   # examine each message 
   for(my $i = 1; $i <= $total_msgs; $i++){
      # process each message header line until Subject match
      foreach( $imap->top( $i ) ) {
         if ( /^Subject:\s+$subject/i ) {
            if ( /^Subject:\s+$subject\s+$timestamp/ ) {
               # this message was sent by the parent of this process
               $received = true;
            }else{
               # this maybe a delayed message fom other process
               $delayeds ++;
            }
            $imap->delete($i);
            last; # go for next message
         };
      };

   };
   $imap->quit();
   return($received);
};
#####################################


# Receives and deletes sent probe message(s) from pop3 server
sub receive_pop3_message {

   $delayeds = 0;
   my $received = false;

   my $pop = new Mail::POP3Client( USER     => $user_account,
                                   PASSWORD => $user_password,
                                   HOST     => $mail_server );

   if ( $pop->Count() == -1 ) { # There was an error trying to connect to POP3 server
      submit_to_nagios("Unable to establish connection to $mail_server server: $!\n", $ERRORS{'CRITICAL'});
   }

   # examine each message
   for( my $i = 1; $i <= $pop->Count(); $i++ ) {
      # process each message header line until Subject match
      foreach( $pop->Head( $i ) ) {
         if ( /^Subject:\s+$subject/i ) {
            if ( /^Subject:\s+$subject\s+$timestamp/ ) {
               # this message was sent by the parent of this process
               $received = true;
            }else{
               # this maybe a delayed message fom other process
               $delayeds ++;
            }
            $pop->Delete($i);
            last; # go for next message
         };
      };

   };
   $pop->Close();
   return($received);
};
#####################################


# Check options received through command line
sub check_options {

   my $ret = false;

   if ( $help ){
      $tip = "(*) usage ...";
   }elsif ( not $critical_threshold > $warning_threshold ){
      $tip = "(*) critical threshold must be greater than warning threshold ...";
   }elsif( !$hostname or !$servicedesc or ! defined $servicestateid ){
      $tip = "(*) missing some Nagios MACRO(s) argument(s) ...";
   }elsif( !$mta_server or !$mail_server ){
      $tip = "(*) missing mta and/or mail server(s) ...";
   }elsif( !$mail_from or !$mail_to ){
      $tip = "(*) missing mail_from and/or mail_to argument(s) ...";
   }elsif( !$mail_protocol or ( $mail_protocol ne "pop3" and $mail_protocol ne "imap") ){
      $tip = "(*) missing or unknown mail protocol (pop3|imap) argument ...";
   }elsif( !$user_account or !$user_password ){
      $tip = "(*) missing mail user account credentials ...";
   }elsif( !$nagios_cmd_file ){
      $tip = "(*) missing nagios command file argument ...";
   }else{
      $ret = true;
   };
   return $ret;
}
######################################


# Return to Nagios (only from parent process)
sub return_to_nagios {

   my($plugin_output, $return_code) = @_;
   print $plugin_output;
   exit($return_code);
};
#####################################


#  Write service output to Nagios command file (only from child process)
sub submit_to_nagios {

   my ( $plugin_output, $return_code ) = @_;
   my $submit_time = time;

   my $cmd_line = "[".$submit_time."] PROCESS_SERVICE_CHECK_RESULT;".$hostname.";".$servicedesc.";".$return_code.";".$plugin_output;
   print CMDFILE $cmd_line;
   close(CMDFILE);
   exit($return_code);
};
#####################################


# Get human readable timestamp 
sub get_time {

    my $unix_time = time;
    my $date = strftime "%Y-%m-%d %H:%M:%S", localtime($unix_time);
    $date .= sprintf ".%05d", ($unix_time-int($unix_time))*100000;
    return $date;    
};
#####################################


sub usage {

print <<EOH;
  $tip

  Synopsis:  $0 [-h] -H <\$HOSTNAME\$> -D <\$SERVICEDESC\$> -S <\$SERVICESTATEID\$> -A <mta server> -M <pop3 or imap server> -F <mail sender> -T <mail recipient> -P <pop3|imap protocol> -u <mail user> -p <mail password> -w <warning threshold> -c <critical threshold> -s <header subject string> -f <nagios command file> -t <timeout>
  
  -h|--help        prints this help
  -H|--host_name   Nagios host name macro \$HOSTNAME\$ expected
  -D|--svc_descr   Nagios service description macro \$SERVICEDESC\$ expected
  -S|--svc_state   Nagios service state ID macro \$SERVICESTATEID\$ expected
  -A|--mta_srv     smtp server FQDN or IP address 
  -M|--mail_srv    mail server FQDN or IP address
  -F|--mail_from   MAIL from: sender account
  -T|--mail_to     RCPT to: recipient account
  -P|--prot        imap or pop3 mail server protocol
  -u|--user        mail account user
  -p|--passwd      mail account password
  -w|--warn        optional warning time(seconds) threshold
  -c|--crit        optional critical time(seconds) threshold
  -s|--subject     optional subject string
  -f|cmd_file      Nagios command file for passive check result submission
  -t|timeout       optional timeout(seconds) for mta & mail servers response

EOH

exit($ERRORS{'WARNING'});
}



__DATA__
This exists so flock() code above works.
DO NOT REMOVE THIS DATA SECTION.
