/*
 * event.c - handle events
 *
 * $Id: event.c 388 2006-11-27 17:09:48Z pw $
 *
 * Copyright (C) 2005-6 Pete Wyckoff <pw@osc.edu>
 *
 * Distributed under the GNU Public License Version 2 or later (See LICENSE)
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>  /* memcpy */
#include <signal.h>
#include <unistd.h>
#include <errno.h>
#include "mpiexec.h"

/* global variable so anybody can walk it */
static LIST_HEAD(evts_list);
struct list_head *evts = &evts_list;

/*
 * Using a full linked list for reasonable speed on insertion and deletion,
 * at the cost of extra pointer operations and storage.  Cannot use an auto-
 * growing array as TM is given pointers into these structs so they must
 * remain stable.
 */
void
evt_add(int evt, int client, int task, evt_type_t type)
{
    evts_t *ep;

    ep = Malloc(sizeof(*ep));
    INIT_LIST_HEAD(&ep->list);
    ep->evt = evt;
    ep->client = client;
    ep->task = task;
    ep->type = type;
    ep->dead = 0;
    ep->obit_evt = 0;
    list_add_tail(&ep->list, evts);
}

evts_t *
evt_lookup(int evt)
{
    evts_t *ep;

    list_for_each_entry(ep, evts, list) {
	if (ep->evt == evt)
	    return ep;
    }
    return NULL;
}

void
evt_del(evts_t *ep)
{
    list_del(&ep->list);
    free(ep);
}

const char *
evt_type_string(evt_type_t type)
{
    switch (type) {
        case EVT_START: return "start";
	case EVT_OBIT: return "obit";
	case EVT_KILL: return "kill";
	default: return "(unknown event type)";
    }
}

void
evt_dump(void)
{
    evts_t *ep;

    printf("%s\n", __func__);
    list_for_each_entry(ep, evts, list)
	printf("evt %d client %d task %d type %s dead %d obit_evt %d\n",
	  ep->evt, ep->client, ep->task, evt_type_string(ep->type),
	  ep->dead, ep->obit_evt);
}


/*
 * General signal handler can be called from myself after a delay (hence
 * the use of alarm), or generated by the stdio listener in response to
 * an MPI_Abort.
 */
static void
kill_others_now(int sig ATTR_UNUSED)
{
    const int alarm_list[] = { SIGALRM };

    debug(1, "%s: alarm went off, killing all other tasks", __func__);
    handle_signals(alarm_list, list_count(alarm_list), SIG_DFL);
    /* kill rest of tasks (hard), but still wait for them to die */
    kill_tasks(SIGKILL);
}

static void
process_obit_event(evts_t *ep)
{
    debug(1, "%s: evt %d task %d on %s stat %d",
      __func__, ep->evt, ep->task, nodes[tasks[ep->task].node].name,
      *tasks[ep->task].status);
    if (startup_complete)
	tasks[ep->task].done = DONE_OK;
    else
	tasks[ep->task].done = DONE_STARTUP_INCOMPLETE;

    /*
     * Kill everybody else if either
     *   --kill command-line argument
     * or
     *   exit_status indicates non-normal exit (i.e. segv will cause all to be
     *   killed but exit(1) will not).
     */
    if (!have_killed)
	if (*tasks[ep->task].status >= PBS_SIG_OFFSET || cl_args->kill_others) {
	    /* schedule to kill all others in a little while */
	    const int alarm_list[] = { SIGALRM };
	    handle_signals(alarm_list, list_count(alarm_list), kill_others_now);
	    alarm(5);
	    have_killed = 1;
	}

    --numspawned;
}

static void
process_kill_event(evts_t *ep)
{
    debug(1, "%s: evt %d task %d on %s", __func__, ep->evt, ep->task,
      nodes[tasks[ep->task].node].name);
    /* mostly ignore it, still wait for the obit */
}

static void
process_start_event(evts_t *ep)
{
    int ret = 0;

    debug(1, "%s: evt %d task %d on %s", __func__, ep->evt, ep->task,
      nodes[tasks[ep->task].node].name);

    --numtasks_waiting_start;

    /* ask for an obit */
    if (concurrent_master) {
	/* (master does not enter his own tasks into the tids array) */
	tm_event_t evt;
	int err = tm_obit(tasks[ep->task].tid, tasks[ep->task].status, &evt);
	if (err == TM_SUCCESS)
	    evt_add(evt, -1, ep->task, EVT_OBIT);
	else if (err == TM_ENOTFOUND)
	    ret = 1;  /* died */
	else
	    error_tm(ret, "%s: tm_obit master %d", __func__, ep->task);
    } else {
	if (ep->obit_evt == -1)
	    ret = 1;  /* died */
	else
	    evt_add(ep->obit_evt, -1, ep->task, EVT_OBIT);
    }

    if (ret) {
	/* mark done if obit failed due to task not found, and status
	 * field will be invalid */
	debug(1, "%s: task %d on %s too fast, no obit", __func__,
	  ep->task, nodes[tasks[ep->task].node].name);
	tasks[ep->task].done = DONE_NO_EXIT_STATUS;
	*tasks[ep->task].status = -1;
	--numspawned;
    }
}

/*
 * Work the effects of this event into the data structures.
 */
void
dispatch_event(evts_t *ep)
{
    /* perhaps ignore or send to a client */
    if (concurrent_master) {
	if (ep->dead) {
	    /* ignore, event for deleted tid */
	    debug(2, "%s: ignoring dead event %d client %d task %d type %s",
	      __func__, ep->evt, ep->client, ep->task,
	      evt_type_string(ep->type));
	    goto out;
	}
	if (ep->client >= 0) {
	    cm_forward_event(ep);
	    goto out;
	}
    }

    switch (ep->type) {
	case EVT_OBIT:
	    process_obit_event(ep);
	    break;
        case EVT_KILL:
	    process_kill_event(ep);
	    break;
	case EVT_START:
	    process_start_event(ep);
	    break;
	default:
	    error("%s: unknown event type %d", __func__, ep->type);
    }

  out:
    evt_del(ep);
}

/*
 * Grab the next tm event.  Return 0 if non-block and nothing.
 */
static evts_t *
poll_or_block_event(int block)
{
    evts_t *ep;
    tm_event_t evt;
    int remote_tm_error;
    int err;

    if (concurrent_master) {
      redo:
	/* never blocking here, must timeout to check other things */
	err = tm_poll(TM_NULL_EVENT, &evt, 0, &remote_tm_error);
	if (err == TM_SUCCESS) {
	    /* valid event, but perhaps did not finish correctly */
	    if (evt != TM_NULL_EVENT) {
		if (remote_tm_error == TM_SUCCESS)
		    ;
		else if (remote_tm_error == TM_ESYSTEM)
		    /* issue warning, but look at event anyway */
		    warning("%s: evt %d remote system error", __func__, evt);
		else
		    error_tm_or_pbs(remote_tm_error,
		      "%s: tm_poll remote %d", __func__, remote_tm_error);
	    }
	} else if (err == TM_ENOTFOUND) {
	    evt = TM_NULL_EVENT;  /* happens for -server when no tasks */
	} else if (err == TM_ENOTCONNECTED) {
	    reconnect_to_mom();
	    goto redo;
	} else
	    error_tm(err, "%s: tm_poll", __func__);

	ep = 0;
	if (evt != TM_NULL_EVENT) {
	    ep = evt_lookup(evt);
	    if (!ep)
		error("%s: no event structure for %d", __func__, evt);
	}

	/*
	 * Check stdio listener.  Non-master equivalent of this code is
	 * pushed down inside select() in concurrent_poll.
	 */
	if (ep == NULL && pipe_with_stdio >= 0) {
	    fd_set rfs;
	    struct timeval tv = { 0, 0 };
	    int n;

	    FD_ZERO(&rfs);
	    FD_SET(pipe_with_stdio, &rfs);
	    n = select(pipe_with_stdio+1, &rfs, 0, 0, &tv);
	    if (n < 0) {
		if (errno != EINTR)
		    error_errno("%s: select", __func__);
	    }
	    if (n > 0)
		stdio_msg_parent_read();
	}
    } else {
	ep = concurrent_poll(block);
    }

    return ep;
}

evts_t *poll_event(void) { return poll_or_block_event(0); }
evts_t *block_event(void) { return poll_or_block_event(1); }

/*
 * Loop over events, non-blocking, handling them until one that was an
 * OBIT for myself happened.  Then return 1.  Return 0 if nothing bad
 * happened or no events were found to process.
 */
int
poll_events_until_obit(void)
{
    evts_t *ep;
    int found_obit = 0;

    while ((ep = poll_event())) {
	if (ep->client == -1 && ep->type == EVT_OBIT)
	    found_obit = 1;
	dispatch_event(ep);
	if (found_obit)
	    break;
    }
    return found_obit;
}



syntax highlighted by Code2HTML, v. 0.9.1