#!/usr/bin/env python
#Developed by: Yancy Ribbens (yribbens@nagios.com)
#Version 0.1

from urllib2 import urlopen, URLError, HTTPError
from optparse import OptionParser, OptionGroup
import sys
import re

usage = "usage: %prog -u URL -w word"
parser = OptionParser(usage=usage)

parser.add_option("-u", "--url", dest="url", type="string", help="Target website")

required = OptionGroup(parser, "Search website for")

required.add_option("-w", "--word", dest="word", type="string", help="Target word or comma seperated list of words")
required.add_option("-l", "--dictionary", dest="dictionary", type="string", help="a file containing a list of words seperated by comma")

parser.add_option_group(required)

(options, args) = parser.parse_args()

def Nagios_Return(short_message="",long_message="",return_code=3):
    if long_message != "":
        output = short_message + " | " + "\n" + long_message
    else:
        output = short_message
        
    if return_code == 0:
        print "OK: " + output
        sys.exit(return_code)
    elif return_code == 1:
        print "Warning: " + output
        sys.exit(return_code)
    elif return_code == 2:
        print "Critical: " + output
        sys.exit(return_code)
    else:
        sys.stdout.write("Plugin Error: ")
        print output
        sys.exit(return_code)
        

def grep_this(the_page,word):
    if word in the_page:
        return word
    else:
        return ""

def scan_url(the_page):
    black_list = []
    page = urlopen(the_page).read()
    if options.word is not None:
        list_of_words = re.split(',', options.word)
        for word in list_of_words:
            found = grep_this(page,word)
            if found != "":
                atom = []
                atom.append(the_page)
                atom.append(found)
                #~ print atom
                black_list.append(atom)
    elif options.dictionary is not None:
        file_of_words = open(options.dictionary, 'r')
        dictionary = file_of_words.read()
        #~ print dictionary
        in_search_of = re.split(',',dictionary)
        for word in in_search_of:
            found = grep_this(page, word)
            if found != '':
                black_list.append(found)
    return black_list
                    
if __name__ == '__main__':
    if len(sys.argv)==1:
        parser.print_help()
        exit(0)

    black_string = ""
    if options.url is not None:
        list_of_pages = re.split(',', options.url)
        for page in list_of_pages:
            url = "http://" + page
            try:
                black_list = scan_url(url)
            except IOError, e:
                print e
                print url
            else:
                if black_list != []:
                    for i in black_list:
                        black_string += ' - '.join(i)
                        black_string += "\n"
        if black_string != "":
             Nagios_Return("Found blacklisted words",black_string,2)
        else:
            long_output = ""
            for i in list_of_pages:
                long_output += "OK: " + i + "\n"
            Nagios_Return("No blacklisted words found",long_output,0)