diff -ruN --exclude='.nfs*' torque-1.1.0p0/README.mpiexec torque-1.1.0p0-bp/README.mpiexec
--- torque-1.1.0p0/README.mpiexec	1970-01-01 10:00:00.000000000 +1000
+++ torque-1.1.0p0-bp/README.mpiexec	2004-08-19 16:41:56.000000000 +1000
@@ -0,0 +1,24 @@
+
+Documentation of changes applied as part of the mpiexec patch.
+http://www.osc.edu/~pw/mpiexec/
+Last changed 04 Dec 2001.
+
+
+General bug fix:
+
+    src/lib/Libifl/tm.c
+
+	Do not call DIS_tcp_setup(-1) if connection to the local mom failed
+	after 5 tries.
+
+Functionality additions:
+
+    src/resmom/start_exec.c
+
+	Added extension to get stdin/out/err files from the environment rather
+	than always using /dev/null during tm_spawn.
+
+    src/cmds/pbs_demux.c
+
+	Never expect a PBS_JOBCOOKIE to be delivered from a process.
+
diff -ruN --exclude='.nfs*' torque-1.1.0p0/src/cmds/pbs_demux.c torque-1.1.0p0-bp/src/cmds/pbs_demux.c
--- torque-1.1.0p0/src/cmds/pbs_demux.c	2004-06-03 04:54:43.000000000 +1000
+++ torque-1.1.0p0-bp/src/cmds/pbs_demux.c	2004-08-19 16:41:56.000000000 +1000
@@ -106,10 +106,8 @@
 struct routem {
 	enum rwhere	r_where;
 	short		r_nl;
-	short		r_first;
 };
 fd_set readset;
-char   *cookie = 0;
 
 
 void readit(int sock, struct routem *prm)
@@ -127,18 +125,6 @@
 
 	i = 0;
 	if ((amt = read(sock, buf, 256)) > 0) {
-	    if (prm->r_first == 1) {
-
-		/* first data on connection must be the cookie to validate it */
-
-		i = strlen(cookie);
-		if (strncmp(buf, cookie, i) != 0) {
-		    (void)close(sock);
-		    prm->r_where = invalid;
-		    FD_CLR(sock, &readset);
-		}
-		prm->r_first = 0;
-	    }
 	    for (pc = buf+i; pc < buf+amt; ++pc) {
 #ifdef DEBUG
 		if (prm->r_nl) {
@@ -176,22 +162,12 @@
 
 
 	parent = getppid();
-	cookie = getenv("PBS_JOBCOOKIE");
-	if (cookie == 0) {
-		fprintf(stderr, "%s: no PBS_JOBCOOKIE found in the env\n",
-			argv[0]);
-		exit(3);
-	}
-#ifdef DEBUG
-	printf("Cookie found in environment: %s\n", cookie);
-#endif
 
 	maxfd = sysconf(_SC_OPEN_MAX);
 	routem = (struct routem *)malloc(maxfd*sizeof(struct routem));
 	for (i=0; i<maxfd; ++i) {
 		(routem+i)->r_where = invalid;
 		(routem+i)->r_nl    = 1;
-		(routem+i)->r_first = 0;
 	}
 	(routem+main_sock_out)->r_where = new_out;
 	(routem+main_sock_err)->r_where = new_err;
@@ -244,7 +220,6 @@
 				newsock = accept(i, 0, 0);
 				(routem+newsock)->r_where =  (routem+i)->r_where== new_out ? old_out : old_err;
 				FD_SET(newsock, &readset);
-				(routem+newsock)->r_first = 1;
 				break;
 			case old_out:
 			case old_err:
diff -ruN --exclude='.nfs*' torque-1.1.0p0/src/lib/Libifl/tm.c torque-1.1.0p0-bp/src/lib/Libifl/tm.c
--- torque-1.1.0p0/src/lib/Libifl/tm.c	2004-06-03 04:54:44.000000000 +1000
+++ torque-1.1.0p0-bp/src/lib/Libifl/tm.c	2004-08-19 16:41:56.000000000 +1000
@@ -458,7 +458,8 @@
 		}
 
 	}
-	DIS_tcp_setup(local_conn);
+	if (local_conn >= 0)
+	    DIS_tcp_setup(local_conn);
 	return (local_conn);
 }
 
diff -ruN --exclude='.nfs*' torque-1.1.0p0/src/resmom/start_exec.c torque-1.1.0p0-bp/src/resmom/start_exec.c
--- torque-1.1.0p0/src/resmom/start_exec.c	2004-06-03 04:54:44.000000000 +1000
+++ torque-1.1.0p0-bp/src/resmom/start_exec.c	2004-08-19 17:12:48.000000000 +1000
@@ -1858,6 +1858,42 @@
 
 
 /*
+ * Look for a certain environment variable which has a port# which should
+ * be opened on the MS to establish communication for one of the 3 stdio
+ * streams.  >=0 return is that valid fd, -1 means no env var found,
+ * -2 means malformed env value or failure to connect.
+ */
+static int
+search_env_and_open(const char *envname, u_long ipaddr)
+{
+    static char *id = "search_env_and_open";
+    int i, len = strlen(envname);
+
+    for (i=0; i<vtable.v_used; i++)
+	if (!strncmp(vtable.v_envp[i], envname, len)) {
+	    const char *cp = vtable.v_envp[i] + len;
+	    char *cq;
+	    int fd, port;
+	    if (*cp++ != '=') break;  /* empty, ignore it */
+	    port = strtol(cp, &cq, 10);
+	    if (*cq) {
+		log_err(errno, id, "improper value for MPIEXEC_STD*_PORT");
+		return -2;
+	    }
+#if 0       /* debugging */
+	    log_err(0, "search_env_and_open attempting open", vtable.v_envp[i]);
+#endif
+	    if ((fd = open_demux(ipaddr, port)) < 0) {
+		log_err(errno, id, "failed connect to mpiexec process on MS");
+		log_err(errno, id, vtable.v_envp[i]);
+		return -2;
+	    }
+	    return fd;
+	}
+    return -1;  /* not found */
+}
+
+/*
 ** Start a process for a spawn request.  This will be different from
 ** a job's initial shell task in that the environment will be specified
 ** and no interactive code need be included.
@@ -1878,7 +1914,7 @@
   int	pipes[2], kid_read, kid_write, parent_read, parent_write;
   int	pts;
   int	i, j;
-  int	fd;
+  int	fd0, fd1, fd2;
   u_long	ipaddr;
   struct	array_strings	*vstrs;
   struct  startjob_rtn sjr;
@@ -2263,7 +2299,11 @@
   ** Set up stdin.
   */
 
-  if ((fd = open("/dev/null", O_RDONLY)) == -1) 
+  /* look through env for a port# on MS we should use for stdin */
+  if ((fd0 = search_env_and_open("MPIEXEC_STDIN_PORT", ipaddr)) == -2)
+    starter_return(kid_write, kid_read, JOB_EXEC_FAIL2, &sjr);
+  /* use /dev/null if no env var found */
+  if (fd0 < 0 && (fd0 = open("/dev/null", O_RDONLY)) == -1)
     {
     log_err(errno,"newtask","could not open dev/null");
 
@@ -2271,19 +2311,24 @@
     }
   else 
     {
-    dup2(fd,0);
-
-    if (fd > 0)
-      close(fd);
+      (void)dup2(fd0, 0);
+      if (fd0 > 0)
+        (void)close(fd0);
     }
 
+  /* look through env for a port# on MS we should use for stdout/err */
+  if ((fd1 = search_env_and_open("MPIEXEC_STDOUT_PORT", ipaddr)) == -2)
+      starter_return(kid_write, kid_read, JOB_EXEC_FAIL2, &sjr);
+  if ((fd2 = search_env_and_open("MPIEXEC_STDERR_PORT", ipaddr)) == -2)
+      starter_return(kid_write, kid_read, JOB_EXEC_FAIL2, &sjr);
+
   if (pjob->ji_numnodes > 1) 
     {
     /*
     ** Open sockets to demux proc for stdout and stderr.
     */
 
-    if ((fd = open_demux(ipaddr,pjob->ji_stdout)) == -1)
+    if (fd1 < 0 && (fd1 = open_demux(ipaddr,pjob->ji_stdout)) == -1)
       {
       starter_return(kid_write,kid_read,JOB_EXEC_FAIL2,&sjr);
   
@@ -2292,12 +2337,12 @@
       exit(1);
       }
 
-    dup2(fd,1);
+    dup2(fd1,1);
 
-    if (fd > 1)
-      close(fd);
+    if (fd1 > 1)
+      close(fd1);
 
-    if ((fd = open_demux(ipaddr,pjob->ji_stderr)) == -1)
+    if (fd2 < 0 && (fd2 = open_demux(ipaddr,pjob->ji_stderr)) == -1)
       {
       starter_return(kid_write,kid_read,JOB_EXEC_FAIL2,&sjr);
 
@@ -2306,44 +2351,70 @@
       exit(1);
       }
 
-    dup2(fd,2);
+    dup2(fd2,2);
 
-    if (fd > 2)
-      close(fd);
+    if (fd2 > 2)
+      close(fd2);
 	
-    write(1,pjob->ji_wattr[(int)JOB_ATR_Cookie].at_val.at_str,
-      strlen(pjob->ji_wattr[(int)JOB_ATR_Cookie].at_val.at_str));
-
-    write(2,pjob->ji_wattr[(int)JOB_ATR_Cookie].at_val.at_str,
-      strlen(pjob->ji_wattr[(int)JOB_ATR_Cookie].at_val.at_str));
     } 
   else if ((pjob->ji_wattr[(int)JOB_ATR_interactive].at_flags&ATR_VFLAG_SET) &&
            (pjob->ji_wattr[(int)JOB_ATR_interactive].at_val.at_long > 0)) 
     {
     /* interactive job, single node, write to pty */
 
-    if ((pts = open_pty(pjob)) < 0) 
-      {
-      log_err(errno,id,"cannot open slave");
+    pts = -1;
+    if (fd1 < 0 || fd2 < 0) {
+      if ((pts = open_pty(pjob)) < 0) 
+        {
+        log_err(errno,id,"cannot open slave");
 
-      starter_return(kid_write,kid_read,JOB_EXEC_FAIL1,&sjr);
-      }
+        starter_return(kid_write,kid_read,JOB_EXEC_FAIL1,&sjr);
+        }
+
+      if (fd1 < 0)
+        fd1 = pts;
+      if (fd2 < 0)
+        fd2 = pts;
 
-    dup2(pts,1);
-    dup2(pts,2);
+    }
+
+    (void)dup2(fd1, 1);
+    (void)dup2(fd2, 2);
+    if (fd1 != pts)
+      (void) close(fd1);
+    if (fd2 != pts)
+      (void) close(fd2);
     }
   else 
     {
     /* normal batch job, single node, write straight to files */
 
-    if (open_std_out_err(pjob) == -1) 
-      {
-      log_err(errno,id,"cannot open stderr/stdout");
+    pts = -1;
+    if (fd1 < 0 || fd2 < 0) {
 
-      starter_return(kid_write, kid_read,JOB_EXEC_FAIL1,&sjr);
-      }
+      if (open_std_out_err(pjob) == -1) 
+        {
+        log_err(errno,id,"cannot open stderr/stdout");
+
+        starter_return(kid_write, kid_read,JOB_EXEC_FAIL1,&sjr);
+        }
     }
 
+    if (fd1 >= 0) {
+      (void) close(1);
+      (void) dup2(fd1, 1);
+      if (fd1 > 1)
+        (void) close(fd1);
+    }
+    if (fd2 >= 0) {
+      (void) close(2);
+      (void) dup2(fd2, 2);
+      if (fd2 > 2)
+        (void) close(fd2);
+    }
+
+  }
+
   log_close(0);
 
   starter_return(


syntax highlighted by Code2HTML, v. 0.9.1