> I agree that your script is nice and simple, and hence less prone to
> errors. I coded mine in c++ because I use it not only for a machine
> type watchdog, but also a task based watchdog that reboots the machine
> based on certain tasks living or not. Each task has to register with
> the watchdog server and continually tell the server they're alive, or
> reboot! But that's a story for another thread...
#!/path/to/perl
use strict;
use Sys::Syslog;
open my $fh, '>', '/dev/watchdog'
or die "/dev/watchdog: $!";
# if any of these go away we need to notice it.
# ok... you'll notice the first one anyway.
my @watchz
= qw
(
init
ntpd
apache
/opt/sybase/ASE-12_5/bin/dataserver
);
# wd timeout / 2, or 1 for minimum sleep
# (avoid usleep: too much overhead).
my $cycle = 15;
# get the syslog handle
openlog blah blah blah
or die 'Et tu, syslog?';
CYCLE:
for(;

{
sleep ( $cycle - ( time % $cycle ) );
# split and args vary by O/S, this works on linux.
my @procz = map { split /s+/, $_, 6 )[5] } qx( ps a );
my %chechz = ();
@chechz{ @watchz } = ();
delete @chechz{ @procz };
if( %chechz )
{
# oops, current proc's don't include the
# list of processes being watched.
#
# this can happen twice in a w/d interval
# before the system goes down.
my $nastygram
= join " ", 'Missing proc's:', join " ", keys %chechz
syslog LOG_CRIT | LOG_FOO, $nastygram;
next CYCLE
# alternative here is to close $fh here and
# bounce the system immediately, the
# approach of looping allows an
# intentional restart of the service
# (in less than 1 w/d cycle) w/o bouncing the box.
}
# if the proc check got this far then the w/d
# file gets poked and we live for another loop.
print $wd "
";
}
# this isn't a module
0
__END__
--
Steven Lembark 85-09 90th St.
Workhorse Computing Woodhaven, NY, 11421
lembark@wrkhors.com +1 888 359 3508
--
gentoo-user@lists.gentoo.org mailing list