#!/usr/local/bin/perl -w
# Copyright (c) 2001-2004 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 1998-2001 University of Notre Dame.
# All rights reserved.
# Copyright (c) 1994-1998 The Ohio State University.
# All rights reserved.
#
# This file is part of the LAM/MPI software package. For license
# information, see the LICENSE file in the top level directory of the
# LAM/MPI source distribution.
#
# $HEADER$
#
# Function: - start an MPI application
#
use strict;
use File::Temp qw(tempfile tempdir);
use Data::Dumper;
use POSIX;
# Global variables
my $host_map;
my $as_fh;
my $as_fn = "";
my $host_arch_hash_created = 0;
my $mpirun_arg = "";
my %mpiexec_arg;
my $verbose_mode = "";
my $boot_flag = "-H";
my $debug_mode = "";
my $tv_mode = "";
my $ssi_args = "";
my $configfile = "";
my $lamboot_on = 0;
my $prefix_boot = 0;
my $prefix_path = "";
my $prefix_bin = "";
my $prefix_arg = "";
my $machinefile = "";
my $bootargs = "";
my $arg_index = 0;
my @local_args;
my $current_state = "";
my $booted_lam = 0;
my $pid;
my $child_waitpid_status;
my $lamhalt_timeout = 10; # If lamhalt does not complete in 10 secs we
# interrupt it and use lamwipe to lamhalt
# Specify signal handler for SIGINT, SIGTERM and SIGHUP
$SIG{INT} = \&sig_handler_INT;
$SIG{TERM} = \&sig_handler_TERM;
$SIG{HUP} = \&sig_handler_HUP;
# Parse global args
parse_global_args();
if ( $debug_mode ne "" ) {
print "mpiexec: Global argument parsing done\n";
}
# lamboot if -machinefile or -boot specified
# -machinefile specifies the host file to be passed to lamboot
# -boot indicates that we should lamboot however if -machinefile is
# not specified then no host file is passed to lamboot
if ( $lamboot_on == 1 ) {
# Cannot use system() as it does not propogate the signals to child
# process
# Cannot use backtics cause even though it propogates signal to child
# process we get runaway lamboot not sure why..
if ( ( $verbose_mode ne "" ) || ( $debug_mode ne "" ) ) {
print "mpiexec: Booting lam..\n";
}
$current_state = "lamboot";
$booted_lam = 1;
# Subroutine for fork-exec-waitpid (few) to fork-exec lamboot
few();
$current_state = "";
undef ($pid);
if ( ( $verbose_mode ne "" ) || ( $debug_mode ne "" ) ) {
print "mpiexec: Lamboot Complete\n";
}
}
# Create hostname => node_no hash
# This will print error and exit if "lamnodes"fail's
# So when user had not lambooted before executing mpiexec and
# has not specified -machinefile then lamnodes will fail and we will
# print error and exit
create_host_node_hash();
if ( $debug_mode ne "" ) {
print "mpiexec: Host-Node Number hash created\n";
}
# Create temp file for appschema (UNLINK makes sure file is deleted on exit
my $tmpdir;
if (exists($ENV{TMPDIR})) {
$tmpdir = $ENV{TMPDIR};
} else {
$tmpdir = "/tmp";
}
($as_fh, $as_fn) = tempfile($tmpdir . "/lam_appschema_XXXXXX");
if ( $debug_mode ne "" ) {
print "mpiexec: Temporary file $as_fn created " .
"(will be used as app schema file for mpirun)\n";
}
# Parse non global command line args
if ($configfile ne "") {
if ( ( $verbose_mode ne "" ) || ( $debug_mode ne "" ) ) {
print "mpiexec: Parsing config file $configfile \n";
}
parse_file($configfile);
} else {
@local_args = @ARGV[$arg_index .. $#ARGV];
parse_arg(@local_args);
}
close($as_fh) ||
mpiexec_die("Cannot close temp appshema file $as_fn!!!! $! \n");
# Execute mpirun
if ( ( $verbose_mode ne "" ) || ( $debug_mode ne "" ) ) {
print "mpiexec: Launching MPI programs\n";
}
$current_state = "mpirun";
# Subroutine for fork-exec-waitpid (few) to fork-exec mpirun
few();
$current_state = "";
undef ($pid);
if ( ( $verbose_mode ne "" ) || ( $debug_mode ne "" ) ) {
print "mpiexec: MPI program execution over..\n";
}
# Delete temp app cshema file we had created
if ( $debug_mode ne "" ) {
print "mpiexec: deleting temprory file $as_fn\n";
}
unlink($as_fn) ||
mpiexec_die("Cannot delete temp appshema file $as_fn!!!! $! \n");
$as_fn = "";
# lamhalt if we lambooted from within mpiexec
if ( $booted_lam == 1 ) {
if ( ( $verbose_mode ne "" ) || ( $debug_mode ne "" ) ) {
print "mpiexec: Performing Lamhalt\n";
}
# Our own function which takes care of Ignoring signals SIGINT,
# SIGTERM and SIGHUP during lamhalt. Also has a timeout mechanism
# where in if lamhalt does not complete within specified number of
# seconds it interrupts lamhalt and runs lamwipe to cleanup LAM RTE
my_lamhalt();
}
exit(0);
######################################################################
## Subroutines from here down
######################################################################
# Our die function
sub mpiexec_die {
if ( $debug_mode ne "" ) {
print "mpiexec: mpiexec_die called\n";
}
# Delete temp app cshema file we had created
if ( $as_fn ne "" ) {
if ( $debug_mode ne "" ) {
print "mpiexec: deleting temprory file $as_fn\n";
}
unlink($as_fn);
$as_fn = "";
}
# lamhalt if we lambooted from within mpiexec
if ( $booted_lam == 1 ) {
# Our own function which takes care of Ignoring signals SIGINT,
# SIGTERM and SIGHUP during lamhalt. Also has a timeout mechanism
# where in if lamhalt does not complete within specified number of
# seconds it interrupts lamhalt and runs lamwipe to cleanup LAM RTE
my_lamhalt();
}
die(@_);
}
# Signal Handlers
# SIGINT signal handler
sub sig_handler_INT {
# Reinstall sighandler
$SIG{INT} = \&sig_handler_INT;
if ( $debug_mode ne "" ) {
print "mpiexec: inside SIGINT handler\n";
}
# parent forwards the signal to fork-exec'ed lamboot process
if ( ( $current_state ne "" ) && ( $pid ) ) {
print "sending sig_INT to $current_state \n";
kill INT => $pid;
waitpid($pid, 0);
# Handle waitpid status
$child_waitpid_status = $?;
handle_waitpid_status( $child_waitpid_status, $current_state );
}
# call generic signal handler
sig_handler();
}
# SIGTERM signal handler
sub sig_handler_TERM {
# Reinstall sighandler
$SIG{TERM} = \&sig_handler_TERM;
if ( $debug_mode ne "" ) {
print "mpiexec: inside SIGTERM handler\n";
}
# parent forwards the signal to fork-exec'ed lamboot process
if ( ( $current_state ne "" ) && ( $pid ) ) {
kill TERM => $pid;
waitpid($pid, 0);
# Handle waitpid status
$child_waitpid_status = $?;
handle_waitpid_status( $child_waitpid_status, $current_state );
}
# call generic signal handler
sig_handler();
}
# SIGHUP signal handler
sub sig_handler_HUP {
# Reinstall sighandler
$SIG{HUP} = \&sig_handler_HUP;
if ( $debug_mode ne "" ) {
print "mpiexec: inside SIGHUP handler\n";
}
# parent forwards the signal to fork-exec'ed lamboot process
if ( ( $current_state ne "" ) && ( $pid ) ) {
kill HUP => $pid;
waitpid($pid, 0);
# Handle waitpid status
handle_waitpid_status( $?, $current_state );
}
# call generic signal handler
sig_handler();
}
# SIGALRM signal handler
sub sig_handler_ALRM {
# Reinstall sighandler
$SIG{ALRM} = \&sig_handler_ALRM;
if ( $debug_mode ne "" ) {
print "mpiexec: inside SIGALRM handler\n";
}
die "timeout\n";
};
# generic signal handler
sub sig_handler {
# MRC: This stuff will change !!!!!!
# We might just get rid of sig_handler and have sig_handler_*
# just call mpiexec_die
mpiexec_die("\n");
}
# Handle waitpid status
sub handle_waitpid_status {
my $waitpid_status = $_[0];
my $wait_func = $_[1];
my $p_exit_status;
if ( $debug_mode ne "" ) {
print "mpiexec: Inside handle_waitpid_status " .
"Function: $wait_func, Error Status: $waitpid_status\n";
}
if ( WIFEXITED($waitpid_status) ) {
if ( ( $p_exit_status = WEXITSTATUS($waitpid_status) ) > 0 ) {
mpiexec_die "$wait_func failed with exit status $p_exit_status\n";
}
}
if ( WIFSIGNALED($waitpid_status) ) {
mpiexec_die "$wait_func was killed by signal " .
WTERMSIG($waitpid_status) . "\n";
}
}
# Parse global args
sub parse_global_args {
# Check for the bozo case of no command line arguments
if ($#ARGV == -1) {
show_help();
exit(0);
}
# look for -test flag as first arg .. this is internal flag used by
# lam configure script to find if appropriate version of Perl is
# available
if ($ARGV[0] eq "-test") {
exit(0);
}
# look for global args -v, -d, -machinefile, -boot, -boot-args
# and, -configfile
while ( defined($ARGV[$arg_index]) ) {
if ($ARGV[$arg_index] eq "-v") {
$verbose_mode = "-v";
$boot_flag = "";
} elsif ($ARGV[$arg_index] eq "-d") {
$debug_mode = "-d";
$boot_flag = "";
} elsif ($ARGV[$arg_index] eq "-configfile") {
$configfile = $ARGV[++$arg_index];
} elsif ($ARGV[$arg_index] eq "-machinefile") {
$machinefile = $ARGV[++$arg_index];
$lamboot_on = 1;
} elsif ($ARGV[$arg_index] eq "-boot") {
$lamboot_on = 1;
} elsif ($ARGV[$arg_index] eq "-prefix") {
$prefix_boot = 1;
$prefix_path = $ARGV[++$arg_index];
$prefix_bin = "$prefix_path/bin/";
} elsif ($ARGV[$arg_index] eq "-boot-args") {
$bootargs = $ARGV[++$arg_index];
$lamboot_on = 1;
} elsif ($ARGV[$arg_index] eq "-tv") {
$tv_mode = "-tv";
} elsif ($ARGV[$arg_index] eq "-ssi") {
$ssi_args = "$ssi_args -ssi " . $ARGV[++$arg_index] . " " .
"\"" . $ARGV[++$arg_index] . "\"";
} elsif ($ARGV[$arg_index] eq "-h") {
show_help();
exit(0);
} else {
return;
}
$arg_index++;
}
}
# Create hostname => node_no hash
sub create_host_node_hash {
my $first = 1;
my $line;
my $node_number;
my $hostname;
open (NODELIST, $prefix_bin."lamnodes 2>&1 |") ||
mpiexec_die("Cannot run Lamnodes! $!\n",
"Check if you had booted lam before calling mpiexec ",
"else pass -machinefile to mpiexec\n");
while ($line = <NODELIST>) {
chomp($line);
if ( $first && ($line =~ /^-----------/) ) {
close(NODELIST);
mpiexec_die("Lamnodes Failed!\n",
"Check if you had booted lam before calling mpiexec ",
"else use -machinefile to pass host file to mpiexec",
"\n");
}
$first = 0;
($node_number, $hostname) = ($line =~ /^(.*?)\s+(.*?):/);
$host_map->{$hostname}->{"lamnode"} = $node_number;
}
close(NODELIST);
}
# Create hostname => archtype hash
sub create_host_arch_hash {
my $line;
my $hostname;
my $bogus;
my $archtype;
open (CMD, "lamexec N laminfo -arch -parsable -hostname |") ||
mpiexec_die("Cannot run lamexec! \n");
while ($line = <CMD>) {
chomp($line);
($hostname, $bogus, $archtype) = split(/:/, $line);
$host_map->{$hostname}->{"arch"} = $archtype;
}
close(CMD);
if ( $debug_mode ne "" ) {
print "mpiexec: Here is hostname => (Node Number, Architecture) " .
"mapping\n";
print Dumper($host_map);
}
$host_arch_hash_created = 1;
}
# Parse command line args
sub parse_arg {
my (@my_ARGV) = @_;
my $i;
my $arg;
my $unrecognized_arg = 0;
$i = 0;
$arg = $my_ARGV[$i];
while ($i <= $#my_ARGV) {
if ( (! $unrecognized_arg ) && ($arg eq "-n") ) {
mpiexec_die("mpiexec arg -n cannot be used twice within same ",
"context \n")
if ( defined($mpiexec_arg{"n"}) );
$mpiexec_arg{"n"} = $my_ARGV[++$i];
} elsif ( (! $unrecognized_arg ) && ($arg eq "-host") ) {
mpiexec_die("mpiexec arg -host cannot be used twice within same ",
"context \n")
if ( defined($mpiexec_arg{"host"}) );
$mpiexec_arg{"host"} = $my_ARGV[++$i];
} elsif ( (! $unrecognized_arg ) && ($arg eq "-arch") ) {
mpiexec_die("mpiexec arg -arch cannot be used twice within same ",
"context \n")
if ( defined($mpiexec_arg{"arch"}) );
$mpiexec_arg{"arch"} = $my_ARGV[++$i];
} elsif ( (! $unrecognized_arg ) && ($arg eq "-wdir") ) {
mpiexec_die("mpiexec arg -wdir cannot be used twice within same ",
"context \n")
if ( defined($mpiexec_arg{"wdir"}) );
$mpiexec_arg{"wdir"} = $my_ARGV[++$i];
} elsif ( (! $unrecognized_arg ) && ($arg eq "-soft") ) {
$mpiexec_arg{"soft"} = $my_ARGV[++$i];
} elsif ( (! $unrecognized_arg ) && ($arg eq "-path") ) {
$mpiexec_arg{"path"} = $my_ARGV[++$i];
} elsif ( (! $unrecognized_arg ) && ($arg eq "-file") ) {
$mpiexec_arg{"file"} = $my_ARGV[++$i];
} elsif ($arg eq ":") {
$arg = $my_ARGV[$i+1];
if (! defined($arg)) {
mpiexec_die("Error in parsing line :\n",
join(" ", @my_ARGV),
"\nYou cannot have : at end of line\n");
}
#write a line in appschema
app_schema_insert();
# Free up (empty) %mpiexec_arg and $mpirun_arg
undef %mpiexec_arg;
undef $mpirun_arg;
$mpirun_arg = "";
$unrecognized_arg = 0;
} else {
# arg not recognized by mpiexec... so assume its mpirun arg
$unrecognized_arg = 1;
$mpirun_arg = $mpirun_arg . $arg . " ";
}
$arg = $my_ARGV[++$i];
}
# write a line in appschema
app_schema_insert();
# Free up (empty) %mpiexec_arg and $mpirun_arg
undef %mpiexec_arg;
undef $mpirun_arg;
$mpirun_arg = "";
}
# Parse configfile
sub parse_file {
my $temp_line;
my @temp_argv;
my $line_continued = 0;
my $prev_line = "";
# Open config file for reading
open (CONF_FILE, "$_[0]") ||
mpiexec_die("Cannot open config file $_[0] : $!\n");
while ($temp_line = <CONF_FILE>) {
# ignore commented lines in configfile
if ($temp_line =~ /^\s*#/) {
next;
}
# If previous line is being continued..
if ($line_continued) {
$temp_line = $prev_line . $temp_line;
$prev_line = "";
$line_continued = 0;
}
# check if this line will be continued..
if ($temp_line =~ /\\$/) {
$temp_line =~ s/\\$//;
$prev_line = $temp_line;
chomp($prev_line);
$line_continued = 1;
next;
}
chomp($temp_line);
@temp_argv = split(/\s+/, $temp_line);
parse_arg(@temp_argv);
}
close (CONF_FILE) ||
mpiexec_die("Cannot close config file $_[0] : $!\n");
}
# Write a line in appschema file
sub app_schema_insert {
my $found = 0;
my $host;
# if -host was specified we need to convert it to <where> option
# for appschema
if ($mpiexec_arg{"host"}) {
if ($host_map->{$mpiexec_arg{"host"}}) {
print $as_fh $host_map->{$mpiexec_arg{"host"}}->{"lamnode"}
, " ";
} else {
mpiexec_die "Invalid hostname " . $mpiexec_arg{"host"} .
" specified\n";
}
}
# if -arch was specified we need to convert it to <where> option
# for appschema
if ($mpiexec_arg{"arch"}) {
if (! $host_arch_hash_created) {
create_host_arch_hash();
}
foreach $host ( keys( %{$host_map} ) ) {
if ($host_map->{$host}->{"arch"} =~ /$mpiexec_arg{"arch"}/) {
print $as_fh $host_map->{$host}->{"lamnode"} . " ";
$found = 1;
}
}
if (! $found) {
mpiexec_die("mpiexec didnt find any node with -arch = ",
$mpiexec_arg{"arch"}, " \n");
}
}
# if -n was specified we need to convert it to -np option for appschema
if ($mpiexec_arg{"n"}) {
print $as_fh "-np " . $mpiexec_arg{"n"} . " ";
}
# if -wdir was specified we need to convert it to -wd option for
# appschema
if ($mpiexec_arg{"wdir"}) {
print $as_fh "-wd " . $mpiexec_arg{"wdir"} . " ";
}
# if -file was specified we need to print warning that it was
# ignored
if ($mpiexec_arg{"file"}) {
print "Warning!!! \"-file " . $mpiexec_arg{"file"} .
"\" was ignored by mpiexec\n";
}
# stuff we don't handle yet
if ( $mpiexec_arg{"soft"} || $mpiexec_arg{"path"} ) {
mpiexec_die "mpiexec's -soft and -path arguments are not ",
"currently supported \n";
}
# Finish the line
print $as_fh $mpirun_arg . "\n";
}
# Our own function which takes care of Ignoring signals SIGINT,
# SIGTERM and SIGHUP during lamhalt. Also has a timeout mechanism
# where in if lamhalt does not complete within specified number of
# seconds it interrupts lamhalt and runs lamwipe to cleanup LAM RTE
sub my_lamhalt {
# ignore signals INT, TERM and HUP during lamhalt
$SIG{INT} = 'IGNORE';
$SIG{TERM} = 'IGNORE';
$SIG{HUP} = 'IGNORE';
$SIG{ALRM} = \&sig_handler_ALRM;
# We have to use eval here because on timeout sig_handler_ALRM
# gets invoked and calls die. eval forces output of die to go into
# $@ instead of exiting the program with that error message.. kind
# of try catch
eval {
alarm ($lamhalt_timeout);
system($prefix_bin."lamhalt $boot_flag $verbose_mode $debug_mode") &&
die("Cannot lamhalt !!!! $! \n");
alarm (0); # clear the still pending alarm
};
if ($@) {
if ($@ =~ /timeout/) {
# lamboot timed out.. so use lamwipe
if ( ( $verbose_mode ne "" ) || ( $debug_mode ne "" ) ) {
print "mpiexec: Lamhalt operation timedout .. trying lamwipe\n";
}
system("lamwipe $verbose_mode $debug_mode $machinefile") &&
die("Cannot lamwipe !!!! $! \n");
if ( ( $verbose_mode ne "" ) || ( $debug_mode ne "" ) ) {
print "mpiexec: lamwipe completed\n";
}
}
else {
# something else caused alarm
# propagate unexpected errors
alarm(0); # clear the still pending alarm
die "\n";
}
}
else {
# lamboot didn't time out
if ( ( $verbose_mode ne "" ) || ( $debug_mode ne "" ) ) {
print "mpiexec: Lamhalt complete\n";
}
}
$booted_lam = 0;
# reinstall signal handlers for signals INT, TERM and HUP
$SIG{INT} = \&sig_handler_INT;
$SIG{TERM} = \&sig_handler_TERM;
$SIG{HUP} = \&sig_handler_HUP;
}
# Subroutine for fork-exec-waitpid (few)
sub few {
if ($pid = fork) {
waitpid($pid, 0);
# check exit status of child process
$child_waitpid_status = $?;
handle_waitpid_status( $child_waitpid_status, $current_state );
} else {
die "Cannot fork: $!\n"
unless defined $pid;
# exec lamboot
if ( $current_state eq "lamboot" ) {
if ( $prefix_boot == 1) {
$prefix_arg = "-prefix $prefix_path";
}
if ( $verbose_mode ne "" ) {
print "Running: " . "lamboot $boot_flag $verbose_mode $debug_mode $bootargs $ssi_args $prefix_arg $machinefile";
}
exec("lamboot $boot_flag $verbose_mode $debug_mode $bootargs $prefix_arg $machinefile")
|| die "mpiexec cannot exec lamboot $machinefile: $!\n";
}
# exec mpirun
if ( $current_state eq "mpirun" ) {
if ( $verbose_mode ne "" ) {
print "Running: " . $prefix_bin."mpirun $ssi_args $verbose_mode $tv_mode $as_fn";
}
exec($prefix_bin."mpirun $ssi_args $verbose_mode $tv_mode $as_fn") ||
mpiexec_die("mpiexec cannot exec mpirun: $!\n");
}
}
}
# Help message
sub show_help {
print "-----------------------------------------------------------------------------
Synopsis: mpiexec [global_args] local_args1 [: local_args2 [...]]
mpiexec [global_args] -configfile <filename>
Description: Run MPI programs on LAM nodes.
Global arguments:
-h This message
-boot Boot the LAM universe before running
-boot-args <args> Pass <args> to the booting agent
-d Lots of debugging output
-machinefile <file> Use boot schema <filename> to boot the LAM universe
-prefix <lam/install/path>
Use the LAM installation specified in </lam/install/path/>
-ssi <key> <value> Set SSI parameter <key> to value <value>
-tv Launch MPI process under the TotalView debugger
-v Be verbose
Local arguments:
-n <numprocs> Specify how many processes to start
-host <hostname> Launch on a specific hostname
-arch <arch> Launch on a specific architecture type
-wdir <dir> Set working directory
<other_arguments> Passed back to the MPI executable
-----------------------------------------------------------------------------\n";
}
syntax highlighted by Code2HTML, v. 0.9.1