#!/usr/bin/env python import getopt, re, socket, sys class Functions: "This class determines the commands to use for your operating system, and executes them." def __init__(self,options): self.nagios_stat_version = 3.12 self.options = options def run(self): if self.options['command'] == "run": # Because users hate infinite loops error = "Unknown check: run\n" print error sys.exit(3) self.__getdata() getattr(self,self.options['command'])() def disk(self): self.__setthreshold("warning",90) self.__setthreshold("critical",95) diskre1 = re.compile("^([\w\/\:\.\-\=\@\$]*)\s*\d*\s*\d*\s*\d*\s*(\d*)\%\s*([\w\/\-]*)") procre1 = re.compile("^\/proc\s*-\s*-\s*-\s*-\s*/proc") # Compile an array of these disks try: disks = [] for i in self.data.splitlines()[1:]: if procre1.match(i) == None: # AIX lists /proc unusually disks.append((diskre1.search(i).groups())) else: pass except AttributeError: self.__unusual_data() # Slim it down to a single disk if -d/-m if self.options.has_key("disk"): for i in disks: if i[0] == self.options['disk']: disks = [i] if self.options.has_key("mount"): for i in disks: if i[2] == self.options['mount']: disks = [i] # Clean out ignored disks/mounts: if self.options.has_key("ignoredisk"): for i in disks[:]: if i[0] in self.options['ignoredisk']: disks.remove(i) if self.options.has_key("ignoremount"): for i in disks[:]: if i[2] in self.options['ignoremount']: disks.remove(i) # Make sure that disk is found if (self.options.has_key("disk") and (len(disks) > 0)): if (self.options['disk'] != disks[0][0]): print "Disk utilization: Specified disk not found." sys.exit(2) if (self.options.has_key("mount") and (len(disks) > 0)): if (self.options['mount'] != disks[0][2]): print "Disk utilization: Specified mount point not found." sys.exit(2) # Compile a list of critical/warning/ok disks critical_disks = [] warning_disks = [] ok_disks = [] for i in disks: if (int(i[1]) >= self.options['critical']): j = i[2] + "(" + str(i[1]) + "%)" critical_disks.append(j) elif (int(i[1]) >= self.options['warning']): j = i[2] + "(" + str(i[1]) + "%)" warning_disks.append(j) else: j = i[2] + "(" + str(i[1]) + "%)" ok_disks.append(j) # Output results if self.options.has_key('verbose'): print "Disk utilization: " + ",".join((critical_disks + warning_disks + ok_disks)), elif (len(critical_disks) + len(warning_disks) == 0): print "Disk utilization: All disks below thresholds.", else: print "Disk utilization: " + ",".join((critical_disks + warning_disks)), # Print out the performance data if option enabled, otherwise just a newline if self.options.has_key('perfdata'): perfdata = "| " + ", ".join((critical_disks + warning_disks + ok_disks)) perfdata = perfdata.replace("(", "=").replace(")", "") print perfdata else: print # Exit with the correct error code if (len(critical_disks) > 0): sys.exit(2) elif (len(warning_disks) > 0): sys.exit(1) else: sys.exit(0) def load(self): "Determines load average on server" self.__setthreshold("warning",2) self.__setthreshold("critical",5) loadre1 = re.compile("up\s*.*,\s*\d*\s*user[s]?,\s*load average[s]?:\s*[0-9\.]*[,]?[\s]*([0-9\.]*)[,]*") loadre2 = re.compile("up\s*.*,\s*\d*\s*user[s]?,\s*load average[s]?:\s*([0-9\.]*)[,]?[\s]*([0-9\.]*)[,]?[\s]*([0-9\.]*)") try: fiveminuteload = loadre1.search(self.data).groups()[0] fiveminuteload = float(fiveminuteload) except AttributeError: self.__unusual_data() # Print out the results print "Load average: %.2f" % fiveminuteload, # If the performance data information is turned on if self.options.has_key('perfdata'): try: perfdata = loadre2.search(self.data).groups() perfdata = map(float, perfdata) perfdata = "| " + "load01min=%.2f, load05min=%.2f, load15min=%.2f" % (perfdata[0], perfdata[1], perfdata[2]) print perfdata except: print else: print # Exit with the correct error code if (fiveminuteload >= self.options['critical']): sys.exit(2) elif (fiveminuteload >= self.options['warning']): sys.exit(1) else: sys.exit(0) def proc(self): "Determines the number of processes running on machine" if self.options.has_key('lt'): self.__setthreshold("warning",-1) self.__setthreshold("critical",1) else: self.__setthreshold("warning",100) self.__setthreshold("critical",200) # Minus one for header, finalizing endline, and the ps itself (quick) numprocesses = self.data.count("\n") - 3 # If they have options, we move everything into an array. if (self.options.has_key('state') or self.options.has_key('processname')): proctable = [] for i in self.data.split("\n"): proctable.append(i.split(None,5)) proctable.pop() del(proctable[0]) # If they're looking for a specfic process, cull the array if self.options.has_key('processname'): temp = [] for i in proctable: try: if i[4].startswith(self.options['processname']): temp.append(i) except IndexError: # Sometimes processes have no name (Solaris zombies) pass proctable = temp numprocesses = len(proctable) del(temp) if self.options.has_key('state'): numprocesses = 0 for i in proctable: for j in self.options['state']: if j in i[2]: numprocesses = numprocesses + 1 break # Print out results print "Number of processes: %d" % numprocesses, # Print out performance data if self.options.has_key('perfdata'): print "| procs=%d" % numprocesses else: print if (self.options.has_key('lt')): if (numprocesses < self.options['critical']): sys.exit(2) elif (numprocesses < self.options['warning']): sys.exit(1) else: if (numprocesses >= self.options['critical']): sys.exit(2) elif (numprocesses >= self.options['warning']): sys.exit(1) sys.exit(0) def swap(self): "Determines percentage of available swap left on server." self.__setthreshold("warning",75) self.__setthreshold("critical",90) self.data = self.data.strip() # On FreeBSD 5.0, the data returned ends with "%" self.data = self.data.replace("%", '') # Some locations use a , instead of a . for the decimal point self.data = self.data.replace(',', '.') self.data = float(self.data) # Print out results print "Swap utilization: %.2f%%" % self.data, # Print out performance data if self.options.has_key('perfdata'): print "| swap=%.2f%%" % self.data else: print if (self.data >= self.options['critical']): sys.exit(2) elif (self.data >= self.options['warning']): sys.exit(1) else: sys.exit(0) def user(self): "Determines number of users connected on server." self.__setthreshold("warning",20) self.__setthreshold("critical",30) userre1 = re.compile("[users]*[=|:]?\s?(\d+)",re.M) try: users = userre1.search(self.data).groups()[0] users = int(users) except AttributeError: self.__unusual_data() # Print out results print "Users connected: %d" % users, # Print out performance data if self.options.has_key('perfdata'): print "| users=%d" % users else: print if (users >= self.options['critical']): sys.exit(2) elif (users >= self.options['warning']): sys.exit(1) else: sys.exit(0) def version(self): "Determines version of daemon server is running." self.__setthreshold("warning",(self.nagios_stat_version-.01)) self.__setthreshold("critical",0) versionre = re.compile("(\w+\s)([\d,.]+)") try: program,version = versionre.search(self.data).groups()[0:2] version = float(version) except AttributeError: self.__unusual_data() # Print out version info print "Running: %s%1.2f" % (program,version), # Print out performance data if self.options.has_key('perfdata'): print "| version=%1.2f" % version else: print if (version <= self.options['critical']): sys.exit(2) elif (version <= self.options['warning']): sys.exit(1) else: sys.exit(0) def __getdata(self): "Connects to server and sends the appropriate command." socketfh = socket.socket(socket.AF_INET,socket.SOCK_STREAM) # Some commands sent to server aren't the same name as the nagios-stat command if self.options['command'] == "load": command = "uptime\n" elif self.options['command'] == "alldisk": command = "disk\n" else: command = self.options['command'] + "\n" try: socketfh.connect((self.options['server'],self.options['port'])) socketfh.send(command) self.data = "" while 1: # Keep pulling down buffer 32KB at a time i = socketfh.recv(32768) self.data = self.data + i if not i: break if self.options.has_key('debug'): print self.data sys.exit(1) if self.data[0:5] == "ERROR": print self.data[6:] sys.exit(1) except socket.error, (errno, strerror): error = "Socket error: " + strerror print error sys.exit(3) def __setthreshold(self,key,value): "Used for setting various threshold for the various checks." if not self.options.has_key(key): self.options[key] = value def __unusual_data(self): print "Server returned unusual data." sys.exit(3) class Initialization: "Methods for interacting with user - initial code entry point." def __init__(self): self.options = {} self.options['port'] = 1040 def getoptions(self): "Parses command line" # Get and set options in the program try: opts, args = getopt.getopt(sys.argv[1:], "c:d:D:lm:M:n:p:Ps:vVw:xh", ["critical=","debug","disk=","ignoredisk=","lt","mount=","ignoremount=","processname=","port=","perfdata","state=","verbose","version","warning=","help"]) except getopt.GetoptError, (msg, opt): print sys.argv[0] + ": " + msg print "Try '" + sys.argv[0] + " --help' for more information." self.usage() # Handle command switches for option,value in opts: if option in ("-c","--critical"): self.options['critical'] = float(value) elif option in ("-d","--disk"): self.options['disk'] = value elif option in ("-D","--ignoredisk"): self.options['ignoredisk'] = value.split(",") elif option in ("-l","--lt"): self.options['lt'] = 1 elif option in ("-m","--mount"): self.options['mount'] = value elif option in ("-M","--ignoremount"): self.options['ignoremount'] = value.split(",") elif option in ("-n","--processname"): self.options['processname'] = value elif option in ("-p","--port"): self.options['port'] = int(value) elif option in ("-P","--perfdata"): self.options['perfdata'] = 1 elif option in ("-s","--state"): self.options['state'] = list(value) self.options['state'].sort() elif option in ("-v","--verbose"): self.options['verbose'] = 1 elif option in ("-V","--version"): self.version() sys.exit(0) elif option in ("-w","--warning"): self.options['warning'] = float(value) elif option in ("-x","--debug"): self.options['debug'] = 1 elif option in ("-h","--help"): self.usage() # Check to see if server and command are set if (len(args) != 2): self.usage() # Can't have 'mount','disk', and 'ignoredisk'/'ignoremount' set at the same time if (self.options.has_key('disk') + self.options.has_key('mount') + (self.options.has_key('ignoredisk') or self.options.has_key('ignoremount')) > 1): print "-d/-m/(-D/-M) are exclusive options." self.usage() # Pull off the two remaining arguments self.options['command'],self.options['server'] = args[0:2] # Check to see if the command actually exists try: i = Functions(None) getattr(i,self.options['command']) del(i) except AttributeError: error = "Unknown check: " + self.options['command'] + "\n" print error self.usage() def main(self): # Check to see if running Python 2.x+ if (int(sys.version[0]) < 2): print "nagios-stat requires Python version 2.0 or greater." sys.exit(3) self.getoptions() i = Functions(self.options) i.run() def usage(self): print "Usage: " + sys.argv[0] + " [OPTION] check server" print "nagios-stat client - remote UNIX system monitoring tool for Nagios." print "Available checks are: disk, load, proc, swap, user and version.\n" print "-c, --critical=LEVEL Level to issue critical at." print "-d, --disk=DISK Disk to check in disk check." print "-D, --ignoredisk=DISK Comma delimited list of disks to ignore." print "-l, --lt Used for process check, reverse warn/critical." print "-m, --mount=MOUNT Mount point to check in disk check." print "-M, --ignoremount=MOUNT Comma delimited list of mount points to ignore." print "-n, --processname=NAME Name of process to search for." print "-p, --port=PORT Port to connect to." print "-P, --perfdata Print out performance data." print "-s, --state=STATE Processes states to check." print "-v, --verbose Verbose output." print "-w, --warning=LEVEL Level to issue warning at." print "-V, --version Output version information and exit." print " -x, --debug Output debug information without processing." print " -h, --help Print this help and exit." sys.exit(3) def version(self): i = Functions(None) print "nagios-stat %.2f" % i.nagios_stat_version print "Written by April King (april@twoevils.org).\n" print "This is free software. There is NO warranty; not even for MERCHANTABILITY or" print "FITNESS FOR A PARTICULAR PURPOSE." print "\nNagios is a trademark of Ethan Galstad." if __name__ == "__main__": i = Initialization() i.main()