#!/usr/local/bin/perl
#
# Checkservice by Paul van Tilburg <paul@linvision.com>
#
# Syntax: 
# checkservice [-t targethost] [-c cfgdir] [-l logdir|-] [-m] [-v [short]] [-s]
#
# Checks ports (services) on servers for their status according to configfiles
# in <cfgdir>/hosts/. Global config file is <cfgdir>/checkservice.conf
# (the default for <cfgdir> is /etc/checkservice).
#
# Reports for each file (filename represents host that should be scanned) to
# logfiles for every host in <cfgdir>/hosts/ or just one host if <host>
# is given. Logfiles can be found in <logdir>. If logdir-option is omitted or
# the value is '-', Checkservice will print to STDOUT. Special output per
# host can be generated with the -m option, Checkservice will print a 0
# (host <host> and all it's services are up) or a 1.

# <<INITIALIZATION>>

use POSIX;
use strict;
use CS::Config;
use CS::Functions;
use IO::Socket;
use File::Find;
use Mail::Send;
use Getopt::Long;

my $program = "Checkservice";
my $version = "1.2.0";
my ($p) = $0 =~ /\S*\/(\w+)/;
my $hostc;			# Pointer to hash containing all
				# servicedata on current host being processed 

my $help_info = <<EOT;
$program $version checks certain services on certain hosts. It's equipped with
various warning methods and several ways for reporting and logging.

Usage: checkservice [OPTIONS]...

 -t, --targethost=STR	Specify host to scan, a configfile for that host must
                        exist in <configdir>/hosts/. If STR is '*' all
			hosts will be scanned for which configfiles are
			available. If targethost is grouped, the grouppath
			must be provided too.
 -c, --configdir=PATH   Specify configdir, (default: /etc/checkservice).
 -l, --logdir=PATH	Logdirectory where log should be kept. If PATH is
			'-', $program will print to stdout.
 -m, --mrtg		Generate only a 0 (host is up & all services are
  			ok) or 1 to stdout. Using this option, logdir
			specification will be ignored.
			NB. the targethost option is compulsory!
 -s, --status		Display status information about available
                        plugins and hosts-configuration.
 -h, --help		Show this help and exit.
 -v, --version=MODE	Show version (if MODE is 'short', only version
			number will be printed) and exit.

EOT

# >> Read command line paramaters.
my %params = (targethost => '*', cfgdir => '/etc/checkservice', logdir => '-');
GetOptions (\%params, qw(cfgdir=s targethost=s logdir=s status! help! mrtg! version:s))
  or die $help_info;

# If --version|-v is provided, show version and exit!
# With 'short' as value, only the version is shown (for the php-statuspage)
if ($params{'version'}) {
  if ($params{'version'} eq "short") { print "$version\n"; }
  else { 
    print "Linvision $program version $version by Paul van Tilburg"
          . "<paul\@linvision.com>.\n";
  }
  exit 0;
}

# Show help and exit!
if ($params{'help'}) { die $help_info; }

# >> Read and parse global configfile.
my %config = ();
(! -d "$params{cfgdir}") and 
  die "$p: Can't find config dir ($params{cfgdir})\n";

if (-r "$params{cfgdir}/checkservice.conf") {
  open CONFIG, "$params{cfgdir}/checkservice.conf" or 
    die "$p: Can't open global configfile "
        . "'$params{cfgdir}/checkservice.conf'\n";
  my ($key, $value);
  
  while (<CONFIG>) {
    chomp; s/#.*//;
    if (($key, $value) = /^\s*(\S+)\s*=\s*(\S.*)/) { $config{$key} = $value; }
  }

  close CONFIG;
}

# Show status and exit, if enabled
if ($params{'status'}) { show_status(); }

# Do some pathcheckking.
(! -d "$config{lockpath}") &&
  die "$p: Can't find or open directory for lockfiles: $config{lockpath}!\n";
(! -d "$config{cachepath}") &&
  die "$p: Can't find or open directory for caching: $config{cachepath}!\n";
(! -d "$params{cfgdir}/hosts") &&
(! -d "$params{logdir}") && ($params{'logdir'} ne "-") and
  die "$p: Can't find dir for hostfiles ($params{cfgdir}/hosts)!\n";

# >> Read and parse host-specific configfiles.
  
my @conffiles = ();
$CS::Config::root = $params{'cfgdir'};

# Determine if all (default) or just one host should be checked.
if ($params{'targethost'} eq "*") {
  finddepth sub { push @conffiles, "$File::Find::dir/\x00$_" if (-f && /^\w/)},
       "$params{cfgdir}/hosts";
} 
else { 
  (! -r "$params{cfgdir}/hosts/$params{targethost}") and 
    die "$p Couldn't read/find configfile for host "
         . "'$params{targethost}': $!\n";
  push @conffiles, "$params{cfgdir}/hosts/$params{targethost}";
}

main();		# Start the main()-sub!

# << MAIN >>
# 
sub main {
  my $cfile;	
  my $wcache;		# Flag to determine if web.cache should be written.

  # Determine if webcache is enabled...
  # if so... the target for writing in the stdout-output-sub will be diverted
  # to the webcache if in logging mode!
  $wcache = 1 if ($config{'webcache'} eq "yes" && $params{'logdir'} ne "-");

  my $target = $wcache ? "$config{cachepath}/web.cache"
		   : ">/dev/stdout";
		  
  open T, ">$target" or
    die "$p: Can't open $target for writing: $!\n";

  # Start processing every host-specific configfile!
  foreach $cfile (map { tr/\x00//d; $_ } sort @conffiles) {
    chomp $cfile;
    my @path = split('/', substr $cfile, (length "$params{cfgdir}/hosts")+ 1);
    my $host = shift @path;
    my $chk;
    my $msg;

    # Read config file for $host.
    if ($msg = checkhostc(@path, $host)) {
      print STDERR "$p: parse error in configfile of host '$host'" .
                   (@path ? " (group: " . join('/', @path) . ")" : '') . 
		   ": '$msg', skipping...\n";
      next;
    }
    $hostc = gethostc(@path, $host);
    
    # Determine if output should go to logfile named $host in $params{logdir},
    # to stdout in special format, ready for mrtg (host must be specified in
    # commandline parameters).
    #
    # Print to stdout in simple format (shell-style return values):
    # 0    <-- hosts is up, all services up
    # 1    <-- host down or one or more services are down
    if ($params{'mrtg'}) {
      $params{'targethost'} eq "*" &&
	die "$p: Specific host must be provided, use '$p -t <host> -m'\n";
      my $check = 0;

      if ($config{'downcritical'} eq 'yes' && host_up($host)) { $check = 1; }
      else {
	foreach (sort keys %{$hostc->{'services'}}) {
	  my %data = %{$hostc->{'services'}->{$_}};
	  $check = 1 if checkservice($host, $data{'ports'},
	                             $data{'shortname'}, $data{'checktype'}); 
	}
      }
      print $check, " (", localtime, ")\n";    
    }
    
    # Logging to logfile is selected, this enables warnings!
    # There are three warning systems: beep, sms and mail.
    elsif ($params{'logdir'} ne '-') {
      my $localhost = `hostname`; chomp $localhost;
      open OUTPUT, ">>$params{logdir}/$host" or
	die "$p: Can\'t open $params{logdir}/$host: $!\n";
      
      if ($config{'webcache'} eq "yes") { $wcache = 1; }
      
      if ($config{'downcritical'} eq "yes" and host_up($host)) {
	print OUTPUT strftime ("%b %e %H:%M:%S", localtime);
	print OUTPUT " $localhost checkservice: error:"
		     . " host $host is unreachable.\n";
	warning("$params{cfgdir}/unreachable.mail", 2, $host, "-", "-");
      } 
      else {
	(-f "$config{lockpath}/$host.-") &&
	  unlink "$config{lockpath}/$host.-";
	foreach (sort keys %{$hostc->{'services'}}) {
	  my %data = %{$hostc->{'services'}->{$_}};
	  my $chk = checkservice($host, $data{'ports'},
				 $data{'shortname'}, $data{'checktype'}); 
	  
	  print OUTPUT strftime ("%b %e %H:%M:%S", localtime),
		       " $localhost checkservice: service ",
		       $data{'shortname'}, ": ";
       
	  if ($chk == 1) {
	    print OUTPUT "failed.\n";
	    warning ("$params{cfgdir}/failed.mail", $chk, $host,
	             $data{'shortname'}, join(',', @{$data{'ports'}}), 
		     $data{'action'});
	  }
	  elsif ($chk == 2) {
	    print OUTPUT "timeout.\n";
	    warning ("$params{cfgdir}/timeout.mail", $chk, $host,
	             $data{'shortname'}, join(',', @{$data{'ports'}}), 
		     $data{'action'});
	  }
	  elsif ($chk == 3) {
	    print OUTPUT "plugin error.\n";
	  }
	  elsif ($chk == 0) {
	    print OUTPUT "ok.\n"; 
	    if (-f "$config{lockpath}/$host.$data{shortname}") {
	      warning ("$params{cfgdir}/successful.mail", $chk, $host,
		       $data{'shortname'}, join(',', @{$data{'ports'}}),
		       $data{'action'});
	      unlink "$config{lockpath}/$host.$data{shortname}";
	    }
	  } 
	  else {
	    print OUTPUT "unknown ($chk).\n";
	  }
	}
      }

      close OUTPUT;
    }
    
    # Print to stdout! Delivers for every host a block, blockdefinition:
    # host:<host>
    # ...
    # error:<message>  ||
    # service:<portset>:<short servicename>:<checklevel>:<longservicename>
    # ...
    # endhost:
    if (($params{'logdir'} eq "-" or $wcache) && ! $params{'mrtg'}) {

      print T "host:", join('/', @path), "$host\n";
      if ($config{'downcritical'} eq "yes" && host_up($host)) {
	print T "error:Host $host is unreachable!\n";
      }
      else {
	my @resultn = ("ok", "failed", "timeout", "plugin error");

	foreach (sort keys %{$hostc->{'services'}}) {
	  my %data = %{$hostc->{'services'}->{$_}};
	  my $chk = checkservice($host, $data{'ports'},
	                         $data{'shortname'}, $data{'checktype'}); 
	  my $perc = percentage($host, $data{'shortname'});
	
	  print T "service:", join(",", @{$data{'ports'}}),
		":$data{shortname}:$data{checktype}:$data{longname}:";
	  print T $chk <= $#resultn ? $resultn[$chk] : "unknown ($chk)", ":";
	  if ($perc ne "n/a") { printf T "%3.6f\n", $perc; }
	  else { print T "n/a\n"; }
	}
      }
      print T "endhost:\n";
    }
  }

  close T;

  tail_failcache();
  exit 0;
}

# <<HELPER SUBROUTINES>>

# Sub for numeric ascending sort.
sub comp_num {
  if ($a < $b) { return -1; }
  elsif ($a == $b) { return 0; }
  elsif ($a > $b) { return 1; }
}

# Displays Checkservice's own status.
sub show_status {
  print "Checkservice: this feature is not yet implemented!\n";
  
  exit 0;
}

# Check if a host is up, uses hostping option that
# sets the pingcount (default 2).
sub host_up {
  my $System = `uname -s`;
  
  unless (defined $config{'hostping'}) { $config{'hostping'} = 2 }
  chomp $System;
  
  # Attempt to increase portability! If I missed one, please let me no.
  if ($System =~ /SunOS/) {
    return system("ping -s $_[0] 56 $config{hostping} > /dev/null 2>&1");
  }
  elsif ($System =~ /HP-UX/) {
    return system("ping $_[0] -n $config{hostping} > /dev/null 2>&1");
  }
  else {
    return system("ping -c $config{hostping} $_[0] > /dev/null 2>&1");
  }
}

# Creates a tail of the failure-cache with a length defined in the global
# configfile (the failureslen-option, default 5)
sub tail_failcache {
  (! -d $params{'logdir'}) && return;
  my @Lines = (open FCACHE, "$config{cachepath}/failures.cache") ? 
               <FCACHE> : ();
  my @NewCache = ();
  my $Line;
  my $start;
  $config{'failureslen'} = 5 unless (defined $config{'failureslen'});
  
  $start = ($config{'failureslen'} > $#Lines) ? 
            0 : ($#Lines - $config{'failureslen'} + 1);
  foreach $Line (@Lines[$start..$#Lines]) {
    push @NewCache, $Line;
  }
 
  open FAILS, ">$config{cachepath}/failures.cache" or 
    die "$p: Couldn't open $config{cachepath}/failures.cache"
        . " for writing: $!\n";
  print FAILS @NewCache; 
  close FAILS;
}

# Find lowest value of 2 params.
sub min {
  return $_[0] < $_[1] ? $_[0] : $_[1];
}

# Generate mail warning. This is the builtin mailwarning plugin.
# If mail.plugin warningplugin is installed it will override this builtin.
# Uses lockfile system to prevent a mail every check.
# Who should be mailed to is a combination of the value of the mailto-var
# in the globalconffile and the host-specific conffile.
sub mail_warning {
  my @wmethods = split(":", $config{'wmethod'});
  my $template = shift;
  my @mailto;
  my $crflag = 0;
  my %substs;
  my ($ptime, $mtime, $ctime);

  my ($host, $service, $ports, $action, $aoutput, $result) = @_;
  my $lockf = "$config{lockpath}/$host.$service";

  if (! -r $lockf) { 
    open LOCKF, ">$lockf" || die "$p: Could not create lockfile $lockf: $!\n";
    print LOCKF $result ? time : 0, "\n";
    
    $crflag = 1;
    $ptime = time;
  }
  else {
    open LOCKF, "<$lockf" || die "$p: Could not read lockfile $lockf: $!\n";
    $ptime = <LOCKF>;
    chomp $ptime;
    my @stats = stat($lockf);
    $mtime = $stats[9];
  }    

  # Send mail if lockfile hasn't been touched longer than given
  # repeatmailw-interval or didn't exist yet.
  # A template is used for the mail that will be sent, substitutions in
  # template:
  # %t -> time server/service went down.
  # %h -> host that went down/host from which a service went down.
  # %s -> service that went down (none if whole server went down).
  # %p -> portset defined for service.
  #
  # If action warning was enabled, the output of the action wil be appended
  # to the mail.
  if (!$result or $crflag or $mtime < (time - $config{'repeatmailw'})) {
    my $M;
    my $Mail;
    my $line;
    my $Subject = "Checkservice warning";
    my @Contents;
    
    $ctime = ctime $ptime;
    chomp $ctime;
    utime time, time, $lockf;
   
    @substs{'%t', '%h', '%s', '%p'} = ($ctime, $host, $service, $ports);
    @mailto = split(':', $config{'mailto'});
    foreach (@{$hostc->{'mailto'}}) { push @mailto, $_; }

    if (open TEMP, $template) {
      my $subst;
      
      while (<TEMP>) { 
	foreach $subst (keys %substs) { s/$subst/$substs{$subst}/g; }
	if (/^Subject:\s(.*)$/) { $Subject = $1; }
	else { push @Contents, $_; }
      }
      if (absgrep(@wmethods, "action") and @$aoutput) {
	push @Contents, "\nPS. Output of action '$action':\n";
	foreach (@$aoutput) { push @Contents, "] $_"; } 
      }
    }
    else {
	push @Contents, "Could not open mail template '$template': $!\n\n" 
		      . ">> Service $service on $host:$ports failed "
		      . "on $ctime!\n";
    }

    $Mail = new Mail::Send(Subject => $Subject, To => @mailto);
    $M = $Mail->open();
    print $M @Contents;
    $M->close();
  }
}

# Try to run a defined action, capture the output and return that.
# Kill the command if it takes longer than what is set in the 
# atimeout-option (default 20 sec.)
sub do_action {
  my @Outp;
  my $Output = \@Outp;
  my $ret;
 
  if (!defined($_[0]) || !$_[0]) {
    @Outp = ();
  }
  else {
    unless (defined($config{'atimeout'})) { $config{'atimeout'} = 20; }
    unless (open PIPE, "-|") { alarm $config{'atimeout'}; exec "$_[0] 2>&1"; }

    while (<PIPE>) { push @Outp, $_; }
    if (!close PIPE) { 
      push @Outp,
           "--- Action timed out after $config{atimeout}s and died ---";
    }
  }

  return $Output;
} 

# Launch warning subroutines for each warning method enabled in global
# configfile.
sub warning {
  my @wmethods = split(":", $config{'wmethod'});
  my ($template, $result, $host, $service, $ports, $action) = @_;
  my ($Output, $w);

  return unless ($result or $config{'back_up'} =~ /yes|y/i);
  
  if ($result) {
    reportfail($host, $service, $ports) 
      unless -f "$config{lockpath}/$host.$service";
    $Output = do_action($action);
  }

  foreach $w (@wmethods) {
    if ($w eq 'mail' and ! -x "$config{pluginpath}/warning/$w.plugin") {
      mail_warning($template, 
                   $host, $service, $ports, $action, $Output, $result);
    }

    if (-x "$config{pluginpath}/warning/$w.plugin") {
      system("$config{pluginpath}/warning/$w.plugin",
             "-c $params{cfgdir} -h $host -p $ports -s $service -r $result");
    }
  }
}

# Calculates uptime percentage for a service by parsing a logfile.
# Calculation is done by taking all entries from the current month
# and dividing the 'ok'-entries by the total number of entries.
# Thus at the end of a month the uptime calculation is the most accurate.
sub percentage {
  my ($host, $service) = @_;
  my ($succ, $cnt, $perc) = (0, 0, 0.0);
  my $month = strftime '%b', localtime;
  my $res;

  open LOGF, "$config{defaultlogdir}/$host";

  while (<LOGF>) {
    chomp;
    if (($res) =
        /$month\s*\d+\s.*\s.*\scheckservice:\sservice\s$service:\s(.*)\./) {
      if (defined $res && $res eq "ok") { $succ++ }
        $cnt++;
    }
  }

  close LOGF;

  if ($cnt) { return (100. * $succ) / $cnt; }
  else { return "n/a"; }
}

sub reportfail {
  my ($host, $service, $ports) = @_;
  
  open  FAILS, ">>$config{cachepath}/failures.cache" or
    die "$p: Couldn't open $config{cachepath}/failures.cache for writing:"
	. " $!\n";
  print FAILS "$host:$service:$ports:", $service eq "-" ?
	      "unreachable" : "unavailable", ":", time, "\n";
  close FAILS;
}

# Does the actual checking! It tries to find a plugin for the service first.
sub checkservice {
  my ($host, $ports, $service, $chklvl) = @_;
  my ($check, $port, $rval);
  $chklvl = $chklvl || "s";

  foreach $port (@$ports) {
    $rval = 1;
    # If checklevel is eXtended and plugin exist for that service,
    # use plugin to scan.
    if ($chklvl eq "x" and -x "$config{pluginpath}/check/$service.plugin") {
      $rval = system("$config{pluginpath}/check/$service.plugin -h $host" .
		     " -t $config{ctimeout} -p $port > /dev/null 2>&1");
      $rval %= 255;
    }
    # If checklevel is Simple or is eXtended, but plugin didn't exist,
    # do a simple portstcan.
    elsif ($chklvl eq "s" or $chklvl eq "x") {
      eval {
	local $SIG{'ALRM'} = sub { $rval = 2; die; };

	unless (defined $config{'timeout'}) { $config{'timeout'} = 10; }	
	alarm $config{'ctimeout'};
	$rval = 0 if defined 
	  IO::Socket::INET->new(PeerAddr => "$host:$port", Proto => "tcp");
      };
      alarm 0;
    }

    $check ||= $rval
  }

  return defined $check ? $check : 1;
}

# << END >>


syntax highlighted by Code2HTML, v. 0.9.1