#!/usr/bin/perl

##################################################################################################################
# Description : Check Nutanix Cluster by SSH
# Date : 19 September 2016
# Author : Fabrice LE DORZE  
# Licence : GPL - http://www.fsf.org/licenses/gpl.txt
#
# Date :    21 august 2017
# Author :  Emmanuel PELERIN
# Features: Add check for Nutanix Metro replications, 
#           Improve code
#
# Date :    21 june 2021
# Author : Fabrice LE DORZE  
# Features: cleanup .libnet-openssh-perl socket file if any
#	    cleanup registered SSH host keys 
#
##################################################################################################################

use strict;
use Net::OpenSSH;
use Getopt::Long;
use Date::Parse;
use Data::Dumper;

my $PROGNAME=`basename $0`;

my $STATE_OK=0;
my $STATE_WARNING=1;
my $STATE_CRITICAL=2;
my $STATE_UNKNOWN=3;

#-----------------------------------------------------
# Usage function
#-----------------------------------------------------
sub Print_Usage() {
	print <<USAGE;

Usage: $PROGNAME -H <host> [-d] [-u <user>] [-p <password>] [-P <prompt>] [-t timeout] -T <status|alerts> 
                                            [-c <criticity_code>]

USAGE
}

#-----------------------------------------------------
# Help function
#-----------------------------------------------------
sub Print_Help() {
	print <<HELP;

This plugin executes different checks on Nutanix Clusters thanks to CLI command by SSH
HELP
	Print_Usage;
        print <<HELP;
Options :	
	-H <hostname> : the hostname.
	-u <user> :  user to connect to the host.
	-p <password> :  password to connect to the host.
        -T <test> : test to execute. Maybe status, alerts, replications.
        -c <criticity> : Nagios status. Default is 1 (WARNING) 
        -r <regexp> : select only items matching the regular expression
        -e : exclude items matching the regular expression above
        -a <age in second>: for alerts test, max age of alerts.i Default is 300.
        -P <prompt> : prompt to wait for once connected. Default is '<.*>';
        -t <timeout> : timeout. Default is 10s
        -d : debug mode


 Example :
$0  -H cluster -u admin -p toto -C status -r snmpd -c 2

        
HELP
	exit $STATE_UNKNOWN;
}

#-----------------------------------------------------
# Print debug
#-----------------------------------------------------
sub Debug
{
    my $debug=shift;
    return unless ($debug);
    open(DEBUG,"<$::input_log");
    while (<DEBUG>)
    {
        print $_;
    }
    close DEBUG;
}

#-----------------------------------------------------
# Get user-given variables
#-----------------------------------------------------
my ($help, $host, $user, $password, $timeout, $test, $prompt, $regexp, $exclude, $max_age, $criticity, $debug);
Getopt::Long::Configure ("bundling");
GetOptions (
'H=s' => \$host,
'u=s' => \$user,
'p=s' => \$password,
'T=s' => \$test,
'c=s' => \$criticity,
'r=s' => \$regexp,
'e' => \$exclude,
'a=s' => \$max_age,
'd' => \$debug,
't=s' => \$timeout,
'P=s' => \$prompt,
'h' => \$help
);

($help) and Print_Help;

print "\nOption missing.\n" and Print_Help unless ($host && $user && $password && $test);
print "\nBad test.\n" and Print_Help unless ($test =~ /status|alerts|replications/);
print "\n-a requires -T alerts.\n" and Print_Help if (!($max_age) and $test eq "alerts");

my @ERRORS=('OK','WARNING','CRITICAL','UNKNOWN');
$criticity=$STATE_CRITICAL unless ($criticity);
$max_age=300 unless ($max_age);
my %commands =
(
status => '/usr/local/nutanix/cluster/bin/cluster status',
alerts => '/home/nutanix/prism/cli/ncli alerts ls',
replications => '/home/nutanix/prism/cli/ncli protection-domain ls'
);

#-----------------------------------------------------
# Cleanup host key
#-----------------------------------------------------
#
`ssh-keygen -f "/var/lib/naemon/.ssh/known_hosts" -R $host`;

#-----------------------------------------------------
# Execute command
#-----------------------------------------------------
my $code=$STATE_OK;
$timeout=10 unless $timeout;
$prompt="<.*>" unless ($prompt);
my @a=getpwuid($<);
my $whoami=$a[0];
our $input_log="/tmp/ssh.$$";

# Connect
my @opts=('-o' => 'StrictHostKeyChecking no','-q');
if ($debug)
{
      $Net::ssh::debug |= 16;
      @opts=( @opts, '-v');
}
my %params = ( 'user'=>$user, ssh_cmd=>'/usr/bin/ssh', timeout=>$timeout, master_opts => \@opts);
%params = ( %params, 'password'=>$password) if ($password);
my $ssh;
$ssh = Net::OpenSSH->new($host,%params); 
unless ($?==0 and $ssh and $ssh->error==0)
{
    #set output to CRITICAL if SSH connection is failed (see GLPI n°45571)
    print $ERRORS[$STATE_UNKNOWN]. " : ".$ssh->error;
    exit $STATE_UNKNOWN;
}

# Execute command
my ($result,$error)=$ssh->capture2($commands{$test});
my @results=split(/\n/,$result);

unless ($result)
{
    print "UNKNOWN : no result\n";
    exit $STATE_UNKNOWN;
}
print $result if ($debug);

#-----------------------------------------------------
# Close Connexion
#-----------------------------------------------------
#kill 9, $ssh->get_master_pid;

#-----------------------------------------------------
# Parse command result
#-----------------------------------------------------
# Cleanup
map {s/\r|\n//g} @results;
map {s/\s+/ /g} @results;
my %faults;
my @defaults;
my ($line, $state, $cvm);
my %comments;
$comments{'status'}="Cluster Status";
$comments{'alerts'}="Alerts since ".$max_age;
$comments{'replications'}="Replication Status";

my @failed_services, my @unknown_services, my @ok_services;
my @warning_alerts, my @critical_alerts, my @alert_details, my %alerts, my $perfs;
my @disabled_replications, my @replications_details, my %replications;
my @details;

# Cluster services status
if ($test eq "status")
{
    while ($#results>-1)
    { 
        # Skip to next CVM
        unless ($line=~/CVM: (\d+\.\d+\.\d+\.\d+) (.*)/)
        {
            $line=shift @results;
            next;
        }
        push @details, $line;
        my $cvm=$1;
        my $cvm_state=$2;

        # Parse services of that CVM
        my @cvm_details;
        $line=shift @results;
	while ($line and $line !~ /CVM:/)
        {
            print "$line\n" if ($debug);
            if ($regexp)
            {
                $line=shift @results, next if ($line!~/$regexp/ and !$exclude);
                $line=shift @results, next if ($line=~/$regexp/ and $exclude);
            }
            my ($service,$state)=($line=~/(\w+)[\s\t]+(\w+)[\s\t]+\[/);
            push  @failed_services, $service." on CVM ".$cvm if ($state !~/UP/i);
            push @ok_services, $service unless (grep{/^$service$/} @ok_services);
            push  @cvm_details, $line;
            $line=shift @results;
        }
         push @unknown_services, $cvm unless (grep {/$regexp/} @cvm_details or $exclude);
        @details=(@details,@cvm_details,"");
    }
    if (@failed_services)
    {
        print "$ERRORS[$criticity], Faulty " . $comments{$test} . ", failed services : " . join(", ",@failed_services).". See details.";
        $code=$criticity
    }
    elsif (@unknown_services)
    {
        print "$ERRORS[1], no service matching '$regexp' on CVMs : " . join(", ",@unknown_services).". See details.";
        $code=$STATE_WARNING;
    }
    else
    {
        print $ERRORS[0] . ", all services (".join(", ",@ok_services). ") are UP on all CVMs, see details.";
        $code=$STATE_OK;
    }
}

# last alerts
elsif ($test eq "alerts")
{
    # Parse output to build of hash table of alerts
    while ($#results>-1)
    { 
        # Skip to next ID
        unless ($line=~/ID.* : (.*)/)
        {
            $line=shift @results;
            next;
        }
        (my $id)=($line=~/ID : (.*)/);
        my ($key,$value)=($line=~/ +(.*) +: +(.*)/);
        $alerts{$id}->{$key}=$value;
        push @alert_details, $line;

        $line=shift @results;
	while ($line  and $line !~ /ID.* : /)
        {
            push @alert_details, $line;
            my ($key,$value)=($line=~/ +(.*) +: +(.*)/);
            $alerts{$id}->{$key}=$value;
            $line=shift @results;
        }
        @{$alerts{$id}->{'Details'}}=@alert_details;
        @alert_details=();
    }
 
    # Loop on alerts hash table
    foreach my $id (keys %alerts)
    {
        # Message
        if ($regexp)
        {
            $line=shift @results, next if ($alerts{$id}->{'Message'}!~/$regexp/ and !$exclude);
            $line=shift @results, next if ($alerts{$id}->{'Message'}=~/$regexp/ and $exclude);
        }

        # Details
        @details=(@details,"",@{$alerts{$id}->{'Details'}});

        # Created On
        my $age=time()-str2time($alerts{$id}->{'Created On'});
        next if ($age>$max_age);

        # Severity
        next unless ($alerts{$id}->{'Severity'}=~/warning|critical/i);
        push @warning_alerts, $id if ($alerts{$id}->{'Severity'}=~/warning/i);;
        push @critical_alerts, $id if ($alerts{$id}->{'Severity'}=~/critical/i);;

        # Acknownledgement 
        next if ($alerts{$id}->{'Acknowledged'}!~/false/i);

        # Resolved
         next if ($alerts{$id}->{'Resolved'}!~/false/i);
    
        
    } 
    my $nbc=0, my $nbw=0;
    if (@critical_alerts)
    {
        $nbc=$#critical_alerts+1;
        print "$ERRORS[$criticity], found ".$nbc." critical alerts since last $max_age seconds. See details.";
        $code=$criticity
    }
    elsif (@warning_alerts)
    {
        $nbw=$#warning_alerts+1;
        print "$ERRORS[1], found ".$nbw." warning alerts since last $max_age seconds. See Details.";
        $code=$STATE_WARNING;
    }
    else
    {
        print $ERRORS[0] . ", no alerts found since last $max_age seconds.";
        $code=$STATE_OK;
    }
    $perfs="warning_alerts=".$nbw." critical_alerts=".$nbc;
}
# check replication status
elsif ($test eq "replications")
{
    while ($#results>-1)
    {
    	#skip to next Protection Domain
        unless ($line=~/Protection Domain.* : (.*)/)
        {
            $line=shift @results;
            next;
        }
        (my $id)=($line=~/Protection Domain : (.*)/);
        my ($key,$value)=($line=~/ +(.*) +: +(.*)/);
        $replications{$id}->{$key}=$value;
        push @replications_details, $line;
        $line=shift @results;
        while ($line  and $line !~ /Protection Domain.* : /)
        {
            push @replications_details, $line;
            my ($key,$value)=($line=~/ +(.*) +: +(.*)/);
            $replications{$id}->{$key}=$value;
            $line=shift @results;
        }
        @{$replications{$id}->{'Details'}}=@replications_details;
        @replications_details=();
    }
    
    # Loop on replication hash table
    foreach my $id (keys %replications)
    {
        # Details
        @details=(@details,"",@{$replications{$id}->{'Details'}});

        # Status
        next unless ($replications{$id}->{'Status'}=~/DISABLED/i);
        push @disabled_replications, $id if ($replications{$id}->{'Status'}=~/DISABLED/i);;
    
    }
    
    my $disabled_count=0;
    if (@disabled_replications)
    {
        $disabled_count=$#disabled_replications+1;
        print "$ERRORS[2], found ".$disabled_count." disabled replication(s). See details...";
        $code=$STATE_CRITICAL;
    }
    else
    {
        print $ERRORS[0] . ", all replications are enabled, see details";
        $code=$STATE_OK;
    }
    $perfs="disabled_count=".$disabled_count;
}

print "\n".join("\n",@details)."\n";
print "|".$perfs."\n" if ($perfs);

#-----------------------------------------------------
# Cleanup
#-----------------------------------------------------
unlink $input_log;

# Cleanup .libnet-openssh-perl file if any
unlink $ssh->{'_ctl_path'};

exit $code;
