/* * exedist.c - use FAST scalable executable distribution to move the exe to * the compute nodes * * Returns 0 on success and 1 in the event that the distribution could * not be handled. * * $Id: exedist.c 374 2006-06-29 17:35:19Z pw $ * * Copyright (C) 2005-6 Pete Wyckoff * Copyright (C) 2005 Dennis Dalessandro * * Distributed under the GNU Public License Version 2 or later (See LICENSE) */ #include #include #include #include #include #include #include #include #include #include #include "mpiexec.h" /* * Overall executable distribution using fast. */ int distribute_executable(void) { int ret = 1; /* failure */ #if HAVE_FAST_DIST const char *fast_command = FAST_DIST_PATH; /* from configure */ int i; int numtasks_save; int local_numtasks; tasks_t *tasks_save; cl_args_t cl_args_save; config_spec_t cs, root_cs; growstr_t *g, *root_g; int temp_fd; char *file_template; int port_num; FILE *fp; const char *exec_to_dist; int *usenodes; exec_to_dist = config_get_unique_executable(); if (!exec_to_dist) return ret; if (!stat_exe(fast_command, 0)) return ret; /* analyze nodes */ usenodes = Malloc(numnodes * sizeof(*usenodes)); memset(usenodes, 0, numnodes * sizeof(*usenodes)); local_numtasks = 0; for (i=0; is; debug(1, "%s: arg string for non root: %s", __func__, g->s); /* and to the root node */ root_g = growstr_init(); growstr_printf(root_g, "-p %d -r %s -e %s -n %s", port_num, nodes[tasks[0].node].name, exec_to_dist, file_template); root_cs.args = root_g->s; debug(1, "%s: arg string for root: %s", __func__, root_g->s); /* build new tasks */ cl_args->which_stdin = STDIN_NONE; cl_args->comm = COMM_NONE; tasks = Malloc(local_numtasks * sizeof(*tasks)); numtasks = local_numtasks; for (i=0; i < numtasks; i++) { tasks[i].num_copies = 1; tasks[i].done = DONE_NOT_STARTED; *tasks[i].status = -1; /* * Slight race condition in that the root wants to actively connect * to some other nodes, but it will retry a bit. Put root last to * hope that there is a bit of delay in startup. */ if (i == numtasks - 1) { tasks[i].node = tasks_save[0].node; tasks[i].conf = &root_cs; } else { tasks[i].node = tasks_save[i+1].node; tasks[i].conf = &cs; } debug(1, "%s: task %d on %d", __func__, i, tasks[i].node); } /* spawn tasks */ start_tasks(); debug(1, "%s: tasks started", __func__); /* wait for them to exit */ wait_tasks(); /* make sure everyone finished successfully */ ret = 0; for (i=0; is)); growstr_free(h); } out: unlink(file_template); free(file_template); free(usenodes); #endif /* HAVE_FAST_DIST */ return ret; }