#!/usr/bin/perl -w
#
# Run some tests.
#
# Copyright (C) 2000-6 Pete Wyckoff <pw@osc.edu>
#
# $Id: runtests.pl 388 2006-11-27 17:09:48Z pw $
#
use strict qw(subs refs);
use English;
#
# If "qsub" is not your qsub program, change $qsub to include the explicit
# path. You can also add other qsub options here too.
#
$qsub = "qsub";
#
# If the backend nodes don't mount this build directory, or they mount
# it under a different name, you'll have to change $dir to suit them.
# If the directory is not available, copy the mpiexec and hello codes
# somewhere the compute nodes can get to them, and point $dir there.
#
$dir = `pwd`; chomp($dir);
#
# This script assumes that it will be able to get $available_nodes nodes and
# $smpsize processors per node from pbs during the testing phase. Change
# these numbers if this won't work for your site.
#
$available_nodes = 4;
$smpsize = 2;
#
# If your PBS configuration has time-shared nodes (generally only for largish
# SMPs), you should set this to 1 which tells the script to request, e.g.
# "ncpus=2" instead of "nodes=4:ppn=2". In this case, $available_nodes above
# is ignored, only 1 node is used. Node properties (below) are also ignored
# when this is on.
#
#$use_ncpus = 1;
#
# Any special node properties? This string will get appended to the
# nodes line, separated by a colon, e.g.: -l nodes=2:ppn=2:amd
# If your PBS scheduler is clever and tries to run single-node jobs
# on hardware that does not have a fancy network card, the job may fail
# when it tries to open the network card; add the right property here.
#
#$nodeprops = "amd";
#$nodeprops = "myrinet";
#$nodeprops = "ib";
#$nodeprops = "gige";
#
# If, for testing purposes, you want to pass some arguments to mpiexec
# every time, put them here. Perhaps specify a communication library
# that is different from the compiled-in default.
#
#$mpcommargs = "--comm=pmi";
#
# Similarly, if you need to set up your environment or load modules or
# whatever, do it in this line that will be run immediately before the
# mpiexec test line, once in every batch script.
#
#$mpenvsetup = "mod load mpich-p4";
#
# If you know that it is not possible in your configuration to
# use your default shell, so you want a permanent "#PBS -S /bin/sh"
# line in every script, put it here. This also disables the
# various shell tests. Set it to "" to use the user's default shell
# and still disable testings all the various shells.
#
#$fixed_shell = "/bin/sh";
$fixed_shell = "";
#
# No need to change below here, hopefully.
#
$testlines = 5; # number of lines to feed to hello across stdin
$sleep_interval = 1.0; # seconds between dots during wait
# initialize possibly empty variables
$nodeprops = "" unless defined($nodeprops);
$mpcommargs = "" unless defined($mpcommargs);
$use_ncpus = 0 unless defined($use_ncpus);
#
# Grep through an output file. Set this flag before the run if you
# only want to count the lines. Problems with pbs_demux mixing up the
# output prevents testing the actual contents of each line reliably in
# the case of -nostdout.
#
$lookhello_count_only = 0;
sub lookhello($$$) {
my ($file, $lines_expected, $file_description) = @_;
my $lines = 0;
if (!open(OUT, $file)) {
print "Missing $file_description output file: $file.\n";
return;
}
while (<OUT>) {
++$lines;
if ($lookhello_count_only) {
next;
}
if (!/^hello /) {
if (defined($ignore_regex) && /$ignore_regex/) {
--$lines; # explicitly requested this line be ignored
} else {
print "File $file: unexpected line: $_";
}
}
}
close(OUT);
if ($lines != $lines_expected) {
print "File $file: got $lines lines, expected $lines_expected.\n"
}
}
#
# Run one test. Verify results.
# args:
# args to mpexec
# args to hello
# lines in mpiexec joined stdout/err
# lines in pbs joined stdout/err
#
$iter = 1;
sub run($$$$) {
my ($mpargs, $helloargs, $expectmp, $expectpbs) = @_;
## debugging
#$mpargs .= " -v";
my $suffix = sprintf("%d.%02s", $$, $iter);
my $scrfile = "testqs.$suffix";
my $outfile = "testqo.$suffix";
my $helloout = "testho.$suffix";
open(TMP, ">$scrfile");
print TMP "#!/bin/sh\n";
my $str;
if ($use_ncpus > 0) {
$str = "#PBS -l ncpus=$smpsize"; # nodes is always 1
} else {
$str = "#PBS -l nodes=$nodes:ppn=$smpsize";
if (length($nodeprops)) {
$str .= ":$nodeprops";
}
}
print TMP "$str\n";
print TMP "#PBS -l walltime=5:00\n";
print TMP "#PBS -l cput=", 5 * $nodes * $smpsize, ":00\n";
print TMP "#PBS -j oe\n";
print TMP "#PBS -o $outfile\n";
if (length($shell)) {
print TMP "#PBS -S $shell\n";
}
print TMP "cd $dir\n";
if (defined($mpenvsetup) && $mpenvsetup =~ /./) {
print TMP "$mpenvsetup\n";
}
if (defined($stdin_no_pipe)) {
print TMP $stdin_no_pipe;
} elsif (defined($stdin_gets)) {
print TMP "echo $stdin_gets | ";
} else {
# What is a good way to do this? brooks@aero.org pointed out a
# nice way to use perl, but it may not be on the compute nodes.
# echo -e is not on bsd. seq, jot are non-standard.
print TMP "echo '[1+pd$testlines>x]sx0lxxq' | dc | ";
}
print TMP "./mpiexec $mpcommargs $mpargs";
if ($mpargs !~ /-config/) {
print TMP " hello $helloargs";
}
print TMP " > $helloout 2>&1\n";
close(TMP);
my $qsub_res;
my $qsub_errfile = "/tmp/mpiexec-runtests-qsub.err";
for (;;) {
unlink($qsub_errfile);
$qsub_res = `sh -c '$qsub $scrfile 2>$qsub_errfile'`;
if (-s $qsub_errfile) {
print "Runtests.pl: qsub failed: ";
system("cat $qsub_errfile");
unlink($qsub_errfile);
print "Runtests.pl: trying again in 1 second.\n";
sleep(1);
next;
}
unlink($qsub_errfile);
last;
}
# wait for the results
$qsub_res =~ s/\..*\n//;
print "$qsub_res to $outfile mpiexec";
if (length($mpcommargs)) {
print " $mpcommargs";
}
if (length($mpargs)) {
print " $mpargs";
}
print " hello";
if (length($helloargs)) {
print " $helloargs";
}
if (length($shell)) {
print " shell=\"$shell\"";
}
print " ";
for (; ! -f $outfile;) {
print ".";
select(undef, undef, undef, $sleep_interval);
}
print "\n";
# wait a bit for the file contents to completely appear, NFS or copy
select(undef, undef, undef, 2 * $sleep_interval);
lookhello($helloout, $expectmp, "mpiexec stdout");
lookhello($outfile, $expectpbs, "PBS -o output");
# inc iter for next
++$iter;
# let gm and pbs catch and clean up
sleep(1);
}
sub numproc_tests(@)
{
my @testlist = @_;
# -n <numproc>
for my $i (@testlist) {
run("-n $i", "", $i, 0);
}
}
sub pernode_nolocal_tests()
{
if ($smpsize > 1) {
run("-pernode", "", $nodes, 0);
}
if ($nodes > 1) {
run("-nolocal", "", ($nodes - 1) * $smpsize, 0);
run("-nolocal -pernode", "", $nodes - 1, 0);
}
}
#
# stdio tests
#
sub stdio_tests($)
{
my ($count) = @_;
# 1..6 hello should ignore the stdin
run("", "", $count, 0);
run("-nostdin", "", $count, 0);
run("-allstdin", "", $count, 0);
$lookhello_count_only = 1;
run("-nostdout", "", 0, $count);
run("-nostdin -nostdout", "", 0, $count);
run("-allstdin -nostdout", "", 0, $count);
$lookhello_count_only = 0;
# 7..12 hello will try to cat the stdin
run("", "-l", $count + $testlines, 0);
run("-nostdin", "-l", $count, 0);
run("-allstdin", "-l", $count + $count * $testlines, 0);
$lookhello_count_only = 1;
run("-nostdout", "-l", 0, $count + $testlines);
run("-nostdout -nostdin", "-l", 0, $count);
run("-nostdout -allstdin", "-l", 0, $count + $count * $testlines);
$lookhello_count_only = 0;
# 13..18 things should work on stderr too
run("", "-stderr", $count, 0);
run("-nostdin", "-stderr", $count, 0);
run("-allstdin", "-stderr", $count, 0);
$lookhello_count_only = 1;
run("-nostdout", "-stderr", 0, $count);
run("-nostdin -nostdout", "-stderr", 0, $count);
run("-allstdin -nostdout", "-stderr", 0, $count);
$lookhello_count_only = 0;
# 19..24 sleepy hello is slow to exit after it closes all stdio
run("", "-sleep", $count, 0);
run("-nostdin", "-sleep", $count, 0);
run("-allstdin", "-sleep", $count, 0);
$lookhello_count_only = 1;
run("-nostdout", "-sleep", 0, $count);
run("-nostdout -nostdin", "-sleep", 0, $count);
run("-nostdout -allstdin", "-sleep", 0, $count);
$lookhello_count_only = 0;
# closing stdin should work, even if no -nostdin (though silly)
# Note if you configured your torque with --enable-shell-pipe
# (the default), bash will exit when this closes its command
# stream. If there's no .ho file, that's why. Use --enable-shell-argv
# instead.
$stdin_no_pipe = "exec 0<&-\n";
run("", "-l", $count, 0);
undef($stdin_no_pipe);
}
#
# Test what happens when a process dies with segv.
#
sub segv_tests($@)
{
my $count = shift @_;
my @testlist = @_;
# The various MPIs say different things as processes die oddly that we try
# to catch and ignore here. Mpich/p4 catches the segv and does exit(1)
# instead, meaning the others may or may not exit properly. Dumb. Worse
# yet, each task tries to call up all the others and sleeps 2 sec between
# failed attempts. That really scales, yeah. Could add -kill here to avoid
# that issue, although it covers up the mpich problem.
print "Note: mpich1/p4 catches SIGSEGV then tries to contact all other\n";
print "processes, with a 2 second delay after each. If you are using\n";
print "that MPI implementation, these tests could hang until the job\n";
print "walltime (5 min) is reached.\n";
# the selected segvs after 2 sec, rest sleep 6 sec
$ignore_regex = "(died with signal|exited with status|p4_error|net_send)";
for my $i (@testlist) {
run("", "-sleep -segv " . ($i-1), $count, 0);
}
# all do segv before MPI_Init
$ignore_regex = "(died with signal|exited before completing MPI startup|were never spawned)";
run("", "-segvearly", 0, 0);
undef($ignore_regex);
}
#
# Test when a process calls MPI_Abort(). Note that mpich2 (using PMI) does
# not support abort. The other tasks may hang in MPI_Finalize and these
# tests will exit only after the batch system time limit.
#
sub abort_tests($@)
{
my $count = shift @_;
my @testlist = @_;
print "Note: MPI_Abort is not implemented properly in mpich2. If you\n";
print "are using that MPI implementation, these tests will hang until\n";
print "the job walltime (5 min) is reached.\n";
# the given task calls MPI_Abort after 2 sec, while the rest sleep 6 sec
$ignore_regex = "(died with signal|exited with status|p4_error|net_send"
. "|MPI[_ ]Abort|Aborting program|aborting job)";
for my $i (@testlist) {
run("", "-sleep -abort " . ($i-1), $count, 0);
}
}
#
# Check exit value returned to environment.
#
sub exitval_tests($)
{
my $count = shift @_;
for (my $i=0; $i<=2; $i++) {
if ($i > 0) {
$ignore_regex = "exited with status $i\\\.";
}
run("", "-exitval $i", $count, 0);
undef($ignore_regex);
}
}
#
# Hostname transformation.
#
sub transform_hostname_tests($)
{
my $count = shift @_;
run("--transform-hostname=s/hiya/hiya/", "", $count, 0);
run("--transform-hostname-program=cat", "", $count, 0);
}
#
# Test use of config file, once from a file and once from stdin.
# Builds the config file or stdin string, then calls run() to do
# the dirty work and verify the results.
#
sub config_tests(@)
{
my @testlist = @_;
for my $i (@testlist) {
my $s = sprintf("testc.%d.%02s", $$, $iter);
open(TMP, ">$s");
print TMP "# mpiexec config\n";
print TMP "-n $i : hello\n";
close(TMP);
run("-config $s", "", $i, 0);
}
for my $i (@testlist) {
$stdin_gets = "\"-n $i : hello\"";
run("-config=- -nostdin", "", $i, 0);
undef($stdin_gets);
}
# Test constraint handling too: more or fewer lines in config
# file than are available, with or without -np on command line.
# Just test max size in list.
my $i = $testlist[$#testlist];
my $j;
# config file specifies more than available, with and without -np
$j = $i + 1;
$stdin_gets = "\"-n $j : hello\"";
run("-config=- -nostdin", "", $i, 0);
run("-config=- -nostdin -np $i", "", $i, 0);
# config file specifies fewer than available, with and without -np
if ($i > 1) {
$j = $i - 1;
$stdin_gets = "\"-n $j : hello\"";
run("-config=- -nostdin", "", $j, 0);
# should error, asked for $i but config file only gave $i-1
$ignore_regex = "(specifies $i processors|only matched $j\\\.)";
run("-config=- -nostdin -np $i", "", 0, 0);
undef($ignore_regex);
}
# done
undef($stdin_gets);
}
#
# Test each shell, make sure the "cd" in the mpiexec startup line works,
# and test passing a single quote.
#
# Some shells are just unsuitable for use non-interactively.
# In particular, csh and tcsh here complain about "no access
# to tty (Bad file descriptor)", but work anyway.
#
sub shell_test()
{
$nodes = 1;
my $count = $nodes * $smpsize;
my @shells;
print "Testing the various shells on your system.\n";
if (open(SHELLS, "/etc/shells")) {
while (<SHELLS>) {
chomp;
if (-x $_) {
push @shells, $_;
}
}
close(SHELLS);
} else {
# reasonable defaults?
for ( "/bin/sh", "/bin/csh" ) {
if (-x $_) {
push @shells, $_;
}
}
}
# modify global $shell for each test
for $shell (@shells) {
print "shell is $shell\n";
# these escapes in perl yield "don\'doit" in file
run("", "don\\\'tdoit", $count, 0);
}
}
#
# main()
#
$| = 1; # see output immediately
if (not -x "hello") {
print "No executable \"hello\". You might \"make hello\" first.\n";
exit(1);
}
# Common user error: PBS is not happy running jobs for root.
if ($EUID == 0) {
print "PBS will not run jobs for root.",
" Run this script as a normal user.\n";
exit(1);
}
# what shell
if (defined($fixed_shell) && $fixed_shell =~ /./) {
$shell = $fixed_shell;
} else {
$shell = ""; # use user default
}
# test max nodes and just one node
if ($use_ncpus > 0) {
$available_nodes = 1; # no multi-node for time-shared
}
@nodeset = ($available_nodes);
if ($available_nodes > 1) { # test nodes=1 case too
push @nodeset, 1;
}
foreach $nodes (@nodeset) {
print "Testing $nodes node", ($nodes > 1 ? "s" : ""),
" with SMP size $smpsize.\n";
# total number of processes
my $count = $nodes * $smpsize;
#
# Only test some subset of all possible combinations. Just beyond
# the SMP size, then the full size.
#
my @testlist;
my $max = $smpsize + 1;
if ($max > $count) {
@testlist = (1..$count);
} else {
@testlist = (1..$max,$count);
}
numproc_tests(@testlist);
pernode_nolocal_tests();
stdio_tests($count);
segv_tests($count, @testlist);
abort_tests($count, @testlist);
exitval_tests($count);
transform_hostname_tests($count);
config_tests(@testlist);
}
if (!defined($fixed_shell)) {
shell_test();
}
exit 0;
syntax highlighted by Code2HTML, v. 0.9.1