#!/usr/local/bin/perl
#
# Checkservice by Paul van Tilburg <paul@linvision.com>
#
# Syntax:
# checkservice [-t targethost] [-c cfgdir] [-l logdir|-] [-m] [-v [short]] [-s]
#
# Checks ports (services) on servers for their status according to configfiles
# in <cfgdir>/hosts/. Global config file is <cfgdir>/checkservice.conf
# (the default for <cfgdir> is /etc/checkservice).
#
# Reports for each file (filename represents host that should be scanned) to
# logfiles for every host in <cfgdir>/hosts/ or just one host if <host>
# is given. Logfiles can be found in <logdir>. If logdir-option is omitted or
# the value is '-', Checkservice will print to STDOUT. Special output per
# host can be generated with the -m option, Checkservice will print a 0
# (host <host> and all it's services are up) or a 1.
# <<INITIALIZATION>>
use POSIX;
use strict;
use CS::Config;
use CS::Functions;
use IO::Socket;
use File::Find;
use Mail::Send;
use Getopt::Long;
my $program = "Checkservice";
my $version = "1.2.0";
my ($p) = $0 =~ /\S*\/(\w+)/;
my $hostc; # Pointer to hash containing all
# servicedata on current host being processed
my $help_info = <<EOT;
$program $version checks certain services on certain hosts. It's equipped with
various warning methods and several ways for reporting and logging.
Usage: checkservice [OPTIONS]...
-t, --targethost=STR Specify host to scan, a configfile for that host must
exist in <configdir>/hosts/. If STR is '*' all
hosts will be scanned for which configfiles are
available. If targethost is grouped, the grouppath
must be provided too.
-c, --configdir=PATH Specify configdir, (default: /etc/checkservice).
-l, --logdir=PATH Logdirectory where log should be kept. If PATH is
'-', $program will print to stdout.
-m, --mrtg Generate only a 0 (host is up & all services are
ok) or 1 to stdout. Using this option, logdir
specification will be ignored.
NB. the targethost option is compulsory!
-s, --status Display status information about available
plugins and hosts-configuration.
-h, --help Show this help and exit.
-v, --version=MODE Show version (if MODE is 'short', only version
number will be printed) and exit.
EOT
# >> Read command line paramaters.
my %params = (targethost => '*', cfgdir => '/etc/checkservice', logdir => '-');
GetOptions (\%params, qw(cfgdir=s targethost=s logdir=s status! help! mrtg! version:s))
or die $help_info;
# If --version|-v is provided, show version and exit!
# With 'short' as value, only the version is shown (for the php-statuspage)
if ($params{'version'}) {
if ($params{'version'} eq "short") { print "$version\n"; }
else {
print "Linvision $program version $version by Paul van Tilburg"
. "<paul\@linvision.com>.\n";
}
exit 0;
}
# Show help and exit!
if ($params{'help'}) { die $help_info; }
# >> Read and parse global configfile.
my %config = ();
(! -d "$params{cfgdir}") and
die "$p: Can't find config dir ($params{cfgdir})\n";
if (-r "$params{cfgdir}/checkservice.conf") {
open CONFIG, "$params{cfgdir}/checkservice.conf" or
die "$p: Can't open global configfile "
. "'$params{cfgdir}/checkservice.conf'\n";
my ($key, $value);
while (<CONFIG>) {
chomp; s/#.*//;
if (($key, $value) = /^\s*(\S+)\s*=\s*(\S.*)/) { $config{$key} = $value; }
}
close CONFIG;
}
# Show status and exit, if enabled
if ($params{'status'}) { show_status(); }
# Do some pathcheckking.
(! -d "$config{lockpath}") &&
die "$p: Can't find or open directory for lockfiles: $config{lockpath}!\n";
(! -d "$config{cachepath}") &&
die "$p: Can't find or open directory for caching: $config{cachepath}!\n";
(! -d "$params{cfgdir}/hosts") &&
(! -d "$params{logdir}") && ($params{'logdir'} ne "-") and
die "$p: Can't find dir for hostfiles ($params{cfgdir}/hosts)!\n";
# >> Read and parse host-specific configfiles.
my @conffiles = ();
$CS::Config::root = $params{'cfgdir'};
# Determine if all (default) or just one host should be checked.
if ($params{'targethost'} eq "*") {
finddepth sub { push @conffiles, "$File::Find::dir/\x00$_" if (-f && /^\w/)},
"$params{cfgdir}/hosts";
}
else {
(! -r "$params{cfgdir}/hosts/$params{targethost}") and
die "$p Couldn't read/find configfile for host "
. "'$params{targethost}': $!\n";
push @conffiles, "$params{cfgdir}/hosts/$params{targethost}";
}
main(); # Start the main()-sub!
# << MAIN >>
#
sub main {
my $cfile;
my $wcache; # Flag to determine if web.cache should be written.
# Determine if webcache is enabled...
# if so... the target for writing in the stdout-output-sub will be diverted
# to the webcache if in logging mode!
$wcache = 1 if ($config{'webcache'} eq "yes" && $params{'logdir'} ne "-");
my $target = $wcache ? "$config{cachepath}/web.cache"
: ">/dev/stdout";
open T, ">$target" or
die "$p: Can't open $target for writing: $!\n";
# Start processing every host-specific configfile!
foreach $cfile (map { tr/\x00//d; $_ } sort @conffiles) {
chomp $cfile;
my @path = split('/', substr $cfile, (length "$params{cfgdir}/hosts")+ 1);
my $host = shift @path;
my $chk;
my $msg;
# Read config file for $host.
if ($msg = checkhostc(@path, $host)) {
print STDERR "$p: parse error in configfile of host '$host'" .
(@path ? " (group: " . join('/', @path) . ")" : '') .
": '$msg', skipping...\n";
next;
}
$hostc = gethostc(@path, $host);
# Determine if output should go to logfile named $host in $params{logdir},
# to stdout in special format, ready for mrtg (host must be specified in
# commandline parameters).
#
# Print to stdout in simple format (shell-style return values):
# 0 <-- hosts is up, all services up
# 1 <-- host down or one or more services are down
if ($params{'mrtg'}) {
$params{'targethost'} eq "*" &&
die "$p: Specific host must be provided, use '$p -t <host> -m'\n";
my $check = 0;
if ($config{'downcritical'} eq 'yes' && host_up($host)) { $check = 1; }
else {
foreach (sort keys %{$hostc->{'services'}}) {
my %data = %{$hostc->{'services'}->{$_}};
$check = 1 if checkservice($host, $data{'ports'},
$data{'shortname'}, $data{'checktype'});
}
}
print $check, " (", localtime, ")\n";
}
# Logging to logfile is selected, this enables warnings!
# There are three warning systems: beep, sms and mail.
elsif ($params{'logdir'} ne '-') {
my $localhost = `hostname`; chomp $localhost;
open OUTPUT, ">>$params{logdir}/$host" or
die "$p: Can\'t open $params{logdir}/$host: $!\n";
if ($config{'webcache'} eq "yes") { $wcache = 1; }
if ($config{'downcritical'} eq "yes" and host_up($host)) {
print OUTPUT strftime ("%b %e %H:%M:%S", localtime);
print OUTPUT " $localhost checkservice: error:"
. " host $host is unreachable.\n";
warning("$params{cfgdir}/unreachable.mail", 2, $host, "-", "-");
}
else {
(-f "$config{lockpath}/$host.-") &&
unlink "$config{lockpath}/$host.-";
foreach (sort keys %{$hostc->{'services'}}) {
my %data = %{$hostc->{'services'}->{$_}};
my $chk = checkservice($host, $data{'ports'},
$data{'shortname'}, $data{'checktype'});
print OUTPUT strftime ("%b %e %H:%M:%S", localtime),
" $localhost checkservice: service ",
$data{'shortname'}, ": ";
if ($chk == 1) {
print OUTPUT "failed.\n";
warning ("$params{cfgdir}/failed.mail", $chk, $host,
$data{'shortname'}, join(',', @{$data{'ports'}}),
$data{'action'});
}
elsif ($chk == 2) {
print OUTPUT "timeout.\n";
warning ("$params{cfgdir}/timeout.mail", $chk, $host,
$data{'shortname'}, join(',', @{$data{'ports'}}),
$data{'action'});
}
elsif ($chk == 3) {
print OUTPUT "plugin error.\n";
}
elsif ($chk == 0) {
print OUTPUT "ok.\n";
if (-f "$config{lockpath}/$host.$data{shortname}") {
warning ("$params{cfgdir}/successful.mail", $chk, $host,
$data{'shortname'}, join(',', @{$data{'ports'}}),
$data{'action'});
unlink "$config{lockpath}/$host.$data{shortname}";
}
}
else {
print OUTPUT "unknown ($chk).\n";
}
}
}
close OUTPUT;
}
# Print to stdout! Delivers for every host a block, blockdefinition:
# host:<host>
# ...
# error:<message> ||
# service:<portset>:<short servicename>:<checklevel>:<longservicename>
# ...
# endhost:
if (($params{'logdir'} eq "-" or $wcache) && ! $params{'mrtg'}) {
print T "host:", join('/', @path), "$host\n";
if ($config{'downcritical'} eq "yes" && host_up($host)) {
print T "error:Host $host is unreachable!\n";
}
else {
my @resultn = ("ok", "failed", "timeout", "plugin error");
foreach (sort keys %{$hostc->{'services'}}) {
my %data = %{$hostc->{'services'}->{$_}};
my $chk = checkservice($host, $data{'ports'},
$data{'shortname'}, $data{'checktype'});
my $perc = percentage($host, $data{'shortname'});
print T "service:", join(",", @{$data{'ports'}}),
":$data{shortname}:$data{checktype}:$data{longname}:";
print T $chk <= $#resultn ? $resultn[$chk] : "unknown ($chk)", ":";
if ($perc ne "n/a") { printf T "%3.6f\n", $perc; }
else { print T "n/a\n"; }
}
}
print T "endhost:\n";
}
}
close T;
tail_failcache();
exit 0;
}
# <<HELPER SUBROUTINES>>
# Sub for numeric ascending sort.
sub comp_num {
if ($a < $b) { return -1; }
elsif ($a == $b) { return 0; }
elsif ($a > $b) { return 1; }
}
# Displays Checkservice's own status.
sub show_status {
print "Checkservice: this feature is not yet implemented!\n";
exit 0;
}
# Check if a host is up, uses hostping option that
# sets the pingcount (default 2).
sub host_up {
my $System = `uname -s`;
unless (defined $config{'hostping'}) { $config{'hostping'} = 2 }
chomp $System;
# Attempt to increase portability! If I missed one, please let me no.
if ($System =~ /SunOS/) {
return system("ping -s $_[0] 56 $config{hostping} > /dev/null 2>&1");
}
elsif ($System =~ /HP-UX/) {
return system("ping $_[0] -n $config{hostping} > /dev/null 2>&1");
}
else {
return system("ping -c $config{hostping} $_[0] > /dev/null 2>&1");
}
}
# Creates a tail of the failure-cache with a length defined in the global
# configfile (the failureslen-option, default 5)
sub tail_failcache {
(! -d $params{'logdir'}) && return;
my @Lines = (open FCACHE, "$config{cachepath}/failures.cache") ?
<FCACHE> : ();
my @NewCache = ();
my $Line;
my $start;
$config{'failureslen'} = 5 unless (defined $config{'failureslen'});
$start = ($config{'failureslen'} > $#Lines) ?
0 : ($#Lines - $config{'failureslen'} + 1);
foreach $Line (@Lines[$start..$#Lines]) {
push @NewCache, $Line;
}
open FAILS, ">$config{cachepath}/failures.cache" or
die "$p: Couldn't open $config{cachepath}/failures.cache"
. " for writing: $!\n";
print FAILS @NewCache;
close FAILS;
}
# Find lowest value of 2 params.
sub min {
return $_[0] < $_[1] ? $_[0] : $_[1];
}
# Generate mail warning. This is the builtin mailwarning plugin.
# If mail.plugin warningplugin is installed it will override this builtin.
# Uses lockfile system to prevent a mail every check.
# Who should be mailed to is a combination of the value of the mailto-var
# in the globalconffile and the host-specific conffile.
sub mail_warning {
my @wmethods = split(":", $config{'wmethod'});
my $template = shift;
my @mailto;
my $crflag = 0;
my %substs;
my ($ptime, $mtime, $ctime);
my ($host, $service, $ports, $action, $aoutput, $result) = @_;
my $lockf = "$config{lockpath}/$host.$service";
if (! -r $lockf) {
open LOCKF, ">$lockf" || die "$p: Could not create lockfile $lockf: $!\n";
print LOCKF $result ? time : 0, "\n";
$crflag = 1;
$ptime = time;
}
else {
open LOCKF, "<$lockf" || die "$p: Could not read lockfile $lockf: $!\n";
$ptime = <LOCKF>;
chomp $ptime;
my @stats = stat($lockf);
$mtime = $stats[9];
}
# Send mail if lockfile hasn't been touched longer than given
# repeatmailw-interval or didn't exist yet.
# A template is used for the mail that will be sent, substitutions in
# template:
# %t -> time server/service went down.
# %h -> host that went down/host from which a service went down.
# %s -> service that went down (none if whole server went down).
# %p -> portset defined for service.
#
# If action warning was enabled, the output of the action wil be appended
# to the mail.
if (!$result or $crflag or $mtime < (time - $config{'repeatmailw'})) {
my $M;
my $Mail;
my $line;
my $Subject = "Checkservice warning";
my @Contents;
$ctime = ctime $ptime;
chomp $ctime;
utime time, time, $lockf;
@substs{'%t', '%h', '%s', '%p'} = ($ctime, $host, $service, $ports);
@mailto = split(':', $config{'mailto'});
foreach (@{$hostc->{'mailto'}}) { push @mailto, $_; }
if (open TEMP, $template) {
my $subst;
while (<TEMP>) {
foreach $subst (keys %substs) { s/$subst/$substs{$subst}/g; }
if (/^Subject:\s(.*)$/) { $Subject = $1; }
else { push @Contents, $_; }
}
if (absgrep(@wmethods, "action") and @$aoutput) {
push @Contents, "\nPS. Output of action '$action':\n";
foreach (@$aoutput) { push @Contents, "] $_"; }
}
}
else {
push @Contents, "Could not open mail template '$template': $!\n\n"
. ">> Service $service on $host:$ports failed "
. "on $ctime!\n";
}
$Mail = new Mail::Send(Subject => $Subject, To => @mailto);
$M = $Mail->open();
print $M @Contents;
$M->close();
}
}
# Try to run a defined action, capture the output and return that.
# Kill the command if it takes longer than what is set in the
# atimeout-option (default 20 sec.)
sub do_action {
my @Outp;
my $Output = \@Outp;
my $ret;
if (!defined($_[0]) || !$_[0]) {
@Outp = ();
}
else {
unless (defined($config{'atimeout'})) { $config{'atimeout'} = 20; }
unless (open PIPE, "-|") { alarm $config{'atimeout'}; exec "$_[0] 2>&1"; }
while (<PIPE>) { push @Outp, $_; }
if (!close PIPE) {
push @Outp,
"--- Action timed out after $config{atimeout}s and died ---";
}
}
return $Output;
}
# Launch warning subroutines for each warning method enabled in global
# configfile.
sub warning {
my @wmethods = split(":", $config{'wmethod'});
my ($template, $result, $host, $service, $ports, $action) = @_;
my ($Output, $w);
return unless ($result or $config{'back_up'} =~ /yes|y/i);
if ($result) {
reportfail($host, $service, $ports)
unless -f "$config{lockpath}/$host.$service";
$Output = do_action($action);
}
foreach $w (@wmethods) {
if ($w eq 'mail' and ! -x "$config{pluginpath}/warning/$w.plugin") {
mail_warning($template,
$host, $service, $ports, $action, $Output, $result);
}
if (-x "$config{pluginpath}/warning/$w.plugin") {
system("$config{pluginpath}/warning/$w.plugin",
"-c $params{cfgdir} -h $host -p $ports -s $service -r $result");
}
}
}
# Calculates uptime percentage for a service by parsing a logfile.
# Calculation is done by taking all entries from the current month
# and dividing the 'ok'-entries by the total number of entries.
# Thus at the end of a month the uptime calculation is the most accurate.
sub percentage {
my ($host, $service) = @_;
my ($succ, $cnt, $perc) = (0, 0, 0.0);
my $month = strftime '%b', localtime;
my $res;
open LOGF, "$config{defaultlogdir}/$host";
while (<LOGF>) {
chomp;
if (($res) =
/$month\s*\d+\s.*\s.*\scheckservice:\sservice\s$service:\s(.*)\./) {
if (defined $res && $res eq "ok") { $succ++ }
$cnt++;
}
}
close LOGF;
if ($cnt) { return (100. * $succ) / $cnt; }
else { return "n/a"; }
}
sub reportfail {
my ($host, $service, $ports) = @_;
open FAILS, ">>$config{cachepath}/failures.cache" or
die "$p: Couldn't open $config{cachepath}/failures.cache for writing:"
. " $!\n";
print FAILS "$host:$service:$ports:", $service eq "-" ?
"unreachable" : "unavailable", ":", time, "\n";
close FAILS;
}
# Does the actual checking! It tries to find a plugin for the service first.
sub checkservice {
my ($host, $ports, $service, $chklvl) = @_;
my ($check, $port, $rval);
$chklvl = $chklvl || "s";
foreach $port (@$ports) {
$rval = 1;
# If checklevel is eXtended and plugin exist for that service,
# use plugin to scan.
if ($chklvl eq "x" and -x "$config{pluginpath}/check/$service.plugin") {
$rval = system("$config{pluginpath}/check/$service.plugin -h $host" .
" -t $config{ctimeout} -p $port > /dev/null 2>&1");
$rval %= 255;
}
# If checklevel is Simple or is eXtended, but plugin didn't exist,
# do a simple portstcan.
elsif ($chklvl eq "s" or $chklvl eq "x") {
eval {
local $SIG{'ALRM'} = sub { $rval = 2; die; };
unless (defined $config{'timeout'}) { $config{'timeout'} = 10; }
alarm $config{'ctimeout'};
$rval = 0 if defined
IO::Socket::INET->new(PeerAddr => "$host:$port", Proto => "tcp");
};
alarm 0;
}
$check ||= $rval
}
return defined $check ? $check : 1;
}
# << END >>
syntax highlighted by Code2HTML, v. 0.9.1