/* * task.c - manage tasks for other clients, not myself * * $Id: task.c 388 2006-11-27 17:09:48Z pw $ * * Copyright (C) 2005-6 Pete Wyckoff * * Distributed under the GNU Public License Version 2 or later (See LICENSE) */ #include #include #include #include #include #include "mpiexec.h" /* global variable so anybody can walk it */ static LIST_HEAD(tids_list); struct list_head *tids = &tids_list; tids_t * tid_add(int tid, int client, int task) { tids_t *tp; tp = Malloc(sizeof(*tp)); tp->tid = tid; tp->client = client; tp->task = task; tp->status = 0; list_add_tail(&tp->list, tids); return tp; } /* * Lookup the tid entry for a given client's task number. */ extern tids_t * tid_find(int client, int task) { tids_t *tp; list_for_each_entry(tp, tids, list) if (tp->client == client && tp->task == task) return tp; return NULL; } /* * Forget about a tid and delete all events associated with it. */ void tid_del(tids_t *tp) { evts_t *ep; list_for_each_entry(ep, evts, list) if (ep->client == tp->client && ep->task == tp->task) ep->dead = 1; list_del(&tp->list); free(tp); } /* * Debugging */ void tid_dump(void) { tids_t *tp; printf("%s\n", __func__); list_for_each_entry(tp, tids, list) printf("tid %d client %d task %d status %d\n", tp->tid, tp->client, tp->task, tp->status); } /* * For debugging printfs, find the node name for a given tm_node_id. */ const char * node_name_from_nid(tm_node_id nid) { int i, j; for (i=0; iclient, tp->task); ret = tm_kill(tp->tid, SIGKILL, &evt); if (ret == TM_SUCCESS) evt_add(evt, tp->client, tp->task, EVT_KILL); else if (ret == TM_ENOTFOUND) { debug(2, "%s: delete already dead client %d task %d", __func__, tp->client, tp->task); tid_del(tp); } else error_tm(ret, "%s: tm_kill client %d task %d", __func__, tp->client, tp->task); return evt; } /* set once to true if somebody's early exit caused all the others to * be killed */ int have_killed = 0; /* * Use tm to send a signal to all tasks. */ void kill_tasks(int signum) { int i; debug(1, "%s: killing all tasks", __func__); for (i=0; iclient == -1) { /* self, any task */ done = 0; break; } } } if (done) break; if (cl_args->verbose) { if (numspawned > 0 && last_numspawned != numspawned) { int i, chars, more; const int width = 80; const int reserve = 18; last_numspawned = numspawned; fprintf(stderr, "%s: %s: waiting for", progname, __func__); chars = 32; more = 0; for (i=0; i width - reserve) { ++more; } else { fprintf(stderr, " %s", nodes[tasks[i].node].name); chars += len; } } if (more) fprintf(stderr, " and %d others", more); fprintf(stderr, ".\n"); } if (numspawned == 0) debug(2, "%s: tasks done, but waiting on events", __func__); } /* look for and handle an event */ if (concurrent_master) { for (;;) { ep = poll_event(); if (!ep) break; dispatch_event(ep); } cm_check_clients(); /* includes timeout */ } else { ep = block_event(); if (ep) dispatch_event(ep); } } }