Exemple #1
0
static inline void 
mapping_add(int from, int to, long feid, long teid)
{
	tor_add_pair(from, to);
	evt_add(from,    feid);
	evt_add(to * -1, teid);
	assert(tor_get_to(from) == to);
	assert(tor_get_from(to) == from);
	assert(evt_torrent(feid) == from);
	assert(evt_torrent(teid) == (-1*to));
}
Exemple #2
0
static inline void 
mapping_add(int from, int to, long feid, long teid)
{
	LOCK();
	evt_add(from,    feid);
	assert(evt_torrent(feid) == from);
	UNLOCK();
}
Exemple #3
0
static inline void 
mapping_add(int from, int to, long feid, long teid)
{
	long tf, tt;

	LOCK();
	tor_add_pair(from, to, feid, teid);
	evt_add(from,    feid);
	evt_add(to * -1, teid);
	assert(tor_get_to(from, &tt) == to);
	assert(tor_get_from(to, &tf) == from);
	assert(evt_torrent(feid) == from);
	assert(evt_torrent(teid) == (-1*to));
	assert(tt == teid);
	assert(tf == feid);
	UNLOCK();
}
Exemple #4
0
void
cos_init(void *arg)
{
	int c, accept_fd, ret;
	long eid;
	char *init_str = cos_init_args(), *create_str;
	int lag, nthds, prio;
	
	cvect_init_static(&evts);
	cvect_init_static(&tor_from);
	cvect_init_static(&tor_to);
	lock_static_init(&sc_lock);
		
	sscanf(init_str, "%d:%d:%d", &lag, &nthds, &prio);
	printc("lag: %d, nthds:%d, prio:%d\n", lag, nthds, prio);
	create_str = strstr(init_str, "/");
	assert(create_str);

	eid = evt_get();
	ret = c = from_tsplit(cos_spd_id(), td_root, create_str, strlen(create_str), TOR_ALL, eid);
	if (ret <= td_root) BUG();
	accept_fd = c;
	evt_add(c, eid);

	/* event loop... */
	while (1) {
		struct tor_conn tc;
		int t;
		long evt;

		memset(&tc, 0, sizeof(struct tor_conn));
		evt = evt_wait_all();
		t   = evt_torrent(evt);

		if (t > 0) {
			tc.feid = evt;
			tc.from = t;
			if (t == accept_fd) {
				tc.to = 0;
				accept_new(accept_fd);
			} else {
				tc.to = tor_get_to(t, &tc.teid);
				assert(tc.to > 0);
				from_data_new(&tc);
			}
		} else {
			t *= -1;
			tc.teid = evt;
			tc.to   = t;
			tc.from = tor_get_from(t, &tc.feid);
			assert(tc.from > 0);
			to_data_new(&tc);
		}

		cos_mpd_update();
	}
}
Exemple #5
0
void cos_init(void *arg)
{
	int c, accept_fd, ret;
	long eid;

	cvect_init_static(&evts);
	cvect_init_static(&tor_from);
	cvect_init_static(&tor_to);
	
	eid = evt_get();
	c = net_create_tcp_connection(cos_spd_id(), cos_get_thd_id(), eid);
	if (c < 0) BUG();
	ret = net_bind(cos_spd_id(), c, 0, 200);
	if (ret < 0) BUG();
	ret = net_listen(cos_spd_id(), c, 255);
	if (ret < 0) BUG();
	accept_fd = c;
	evt_add(c, eid);

	while (1) {
		struct tor_conn tc;
		int t;
		long evt;

		memset(&tc, 0, sizeof(struct tor_conn));
		printc("waiting...\n");
		evt = evt_grp_wait(cos_spd_id());
		t   = evt_torrent(evt);

		if (t > 0) {
			tc.feid = evt;
			tc.from = t;
			if (t == accept_fd) {
				tc.to = 0;
				printc("accepting event.\n");
				accept_new(accept_fd);
			} else {
				tc.to = tor_get_to(t);
				assert(tc.to > 0);
				printc("data from net.\n");
				from_data_new(&tc);
			}
		} else {
			t *= -1;
			tc.teid = evt;
			tc.to   = t;
			tc.from = tor_get_from(t);
			assert(tc.from > 0);
			printc("data from torrent.\n");
			to_data_new(&tc);
		}

		cos_mpd_update();
	}
}
Exemple #6
0
void
cos_init(void *arg)
{
	int c, accept_fd, ret;
	long eid;
	char *init_str = cos_init_args();
	char __create_str[128];
	static volatile int first = 1, off = 0;
	int port;
	u64_t start, end;

	if (cos_get_thd_id() == pid_thd) {
		pid_process();
	}
	
	union sched_param sp;

	if (first) {
		first = 0;

		sp.c.type = SCHEDP_PRIO;
		sp.c.value = 10;
		pid_thd = sched_create_thd(cos_spd_id(), sp.v, 0, 0);

		init(init_str);
		return;
	}


	printc("Thread %d, port %d\n", cos_get_thd_id(), __port+off);	
	port = off++;
	port += __port;
	eid = evt_get();
	if (snprintf(__create_str, 128, create_str, port) < 0) BUG();
	ret = c = from_tsplit(cos_spd_id(), td_root, __create_str, strlen(__create_str), TOR_ALL, eid);
	if (ret <= td_root) BUG();
	accept_fd = c;
	printc("accept_fd %d (eid %d)\n", accept_fd, eid);
	evt_add(c, eid);

	/* event loop... */
	while (1) {
		int t;
		long evt;
		
		evt = evt_wait_all();
		t   = evt_torrent(evt);
		printc("an interrupt comes in (thd %d, evt %d t %d)\n",
		       cos_get_thd_id(), evt, t);
		accept_new(accept_fd);
		break;
	}
}
Exemple #7
0
/*
 * Start the tasks, with much stuff in the environment.  If concurrent
 * master, this could be on behalf of some other mpiexec to which we
 * will forward any event/error results.
 */
int
start_tasks(int spawn)
{
    int i, ret = 0;
    char *nargv[3];
    char pwd[PATH_MAX];
    char *cp;
    int conns[3];  /* expected connections to the stdio process */
    int master_port = 0;
    const char *user_shell;
    growstr_t *g;
    int gmpi_port[2];
    int pmi_fd;
    int task_start, task_end;
    const char *mpiexec_redir_helper_path;
    char *psm_uuid = NULL;
    int tv_port = 0;

    /* for looping from 0..numtasks in the case of MPI_Spawn */
    task_start = spawns[spawn].task_start;
    task_end = spawns[spawn].task_end;

    /*
     * Get the pwd.  Probably can trust libc not to overflow this,
     * but who knows.
     */
    if (!getcwd(pwd, sizeof(pwd)))
	error("%s: no current working directory", __func__);
    pwd[sizeof(pwd)-1] = '\0';

    /*
     * Eventually use the user's preferred shell.
     */
    if ((cp = getenv("SHELL")))
	user_shell = cp;
    else if (pswd->pw_shell)
	user_shell = pswd->pw_shell;
    else
	user_shell = "/bin/sh";  /* assume again */

    /*
     * Rewrite argv to go through user's shell, just like rsh.
     *   $SHELL, "-c", "cd <path>; exec <argv0> <argv1>..."
     * But to change the working dir and not frighten weak shells like tcsh,
     * we must detect that the dir actually exists on the far side before
     * committing to the cd.  Use /bin/sh for this task, hoping it exists
     * everywhere we'll be.  Then there's also a bit of quoting nightmare
     * to handle too.  So we'll end up with:
     * rsh node "/bin/sh -c 'if test -d $dir ; then cd $dir ; fi ; $SHELL -c
     *         \'exec argv0 argv1 ...\''"
     * but with argv* (including the executable, argv0) changed to replace
     * all occurrences of ' with '\''.
     */
    nargv[0] = strsave("/bin/sh");  /* assume this exists everywhere */
    nargv[1] = strsave("-c");

    /* exec_line constructed for each process */
    g = growstr_init();

    /*
     * Start stdio stream handler process, if anybody gets stdin,
     * or !nostdout.
     */
    if (cl_args->which_stdin == STDIN_NONE)
	conns[0] = 0;
    else if (cl_args->which_stdin == STDIN_ONE) {
	if (spawn == 0)
	    conns[0] = 1;
	else
	    conns[0] = 0;  /* already connected the single stdin */
    } else if (cl_args->which_stdin == STDIN_ALL) {
	/* total processes which connect stdin */
	conns[0] = 0;
	for (i=task_start; i<task_end; i++)
	    conns[0] += tasks[i].num_copies;
    }

    if (cl_args->nostdout)
	conns[1] = conns[2] = 0;
    else
	/* even for p4 and shmem, not with multiplicity */
  	conns[1] = conns[2] = task_end - task_start;

    /*
     * Initialize listener sockets for gm and ib, since these will be
     * used to implement MPI_Abort in the stdio listener later.
     */
    if (cl_args->comm == COMM_MPICH_GM) {
	prepare_gm_startup_ports(gmpi_port);
    } else if (cl_args->comm == COMM_MPICH_IB) {
	master_port = prepare_ib_startup_port(&gmpi_fd[0]);
	gmpi_fd[1] = -1;
    } else if (cl_args->comm == COMM_MPICH_PSM) {
	master_port = prepare_psm_startup_port(&gmpi_fd[0]);
	gmpi_fd[1] = -1;
    } else if (cl_args->comm == COMM_MPICH_RAI) {
	master_port = prepare_rai_startup_port();
	gmpi_fd[0] = -1;
	gmpi_fd[1] = -1;
    } else {
	gmpi_fd[0] = -1;
	gmpi_fd[1] = -1;
    }

    pmi_fd = -1;
    if (cl_args->comm == COMM_MPICH2_PMI) {
	/* stdio listener handles all PMI activity, even startup */
	if (spawn == 0)
	    master_port = prepare_pmi_startup_port(&pmi_fd);
	else
	    master_port = stdio_msg_parent_say_more_tasks(
	                    task_end - task_start, conns);
    }

    /* flush output buffer, else forked child will have the output too */
    fflush(stdout);

    /* fork the listener (unless we're just spawning more tasks) */
    if (spawn == 0)
	stdio_fork(conns, gmpi_fd, pmi_fd);

    if (pmi_fd >= 0)
	close(pmi_fd);  /* child has it now */

    numtasks_waiting_start = 0;
    if (cl_args->comm == COMM_NONE)
	/* do not complain if they exit before all other tasks are up */
	startup_complete = 1;
    else
	startup_complete = 0;

    /*
     * Start signal handling _after_ stdio child is up.
     */
    handle_signals(0, 0, killall);

    /*
     * environment variables common to all tasks
     */
    env_init();

    /* override user env with these */
    if (cl_args->comm == COMM_MPICH_GM) {
	env_add_int("GMPI_MAGIC", atoi(jobid));
	/* PBS always gives us the "mother superior" node first in the list */
	env_add("GMPI_MASTER", nodes[0].name);
	env_add_int("GMPI_PORT", gmpi_port[0]);   /* 1.2.5..10 */
	env_add_int("GMPI_PORT1", gmpi_port[0]);  /* 1.2.4..8a */
	env_add_int("GMPI_PORT2", gmpi_port[1]);
	env_add_int("GMPI_NP", numtasks);
	env_add_int("GMPI_BOARD", -1);

	/* ditto for new MX version */
	env_add_int("MXMPI_MAGIC", atoi(jobid));
	env_add("MXMPI_MASTER", nodes[0].name);
	env_add_int("MXMPI_PORT", gmpi_port[0]);
	env_add_int("MXMPI_NP", numtasks);
	env_add_int("MXMPI_BOARD", -1);

	/* for MACOSX to override default malloc */
	env_add_int("DYLD_FORCE_FLAT_NAMESPACE", 1);
    }

    if (cl_args->comm == COMM_EMP) {
	growstr_t *emphosts = growstr_init();
	for (i=0; i<numtasks; i++)
	    growstr_printf(emphosts, "%s%s", (i > 0 ? " " : ""),
	      nodes[tasks[i].node].mpname);
	env_add("EMPHOSTS", emphosts->s);
	growstr_free(emphosts);
    }
    
    if (cl_args->comm == COMM_MPICH_IB || cl_args->comm == COMM_MPICH_RAI) {
	int len;
	char *cq, *cr;
	env_add("MPIRUN_HOST", nodes[0].name);  /* master address */
	env_add_int("MPIRUN_PORT", master_port);
	env_add_int("MPIRUN_NPROCS", numtasks);
	env_add_int("MPIRUN_ID", atoi(jobid));  /* global job id */
	/*
	 * pmgr_version >= 3 needs this terribly long string in every task.
	 * Since it may be quite large, we do the allocation by hand and
	 * skip some growstr overhead.
	 */
	len = numtasks;  /* separating colons and terminal \0 */
	for (i=0; i<numtasks; i++)
	    len += strlen(nodes[tasks[i].node].name);
	cq = cp = Malloc(len);
	for (i=0; i<numtasks; i++) {
	    for (cr=nodes[tasks[i].node].name; *cr; cr++)
		*cq++ = *cr;
	    *cq++ = ':';
	}
	--cq;
	*cq = '\0';
	env_add("MPIRUN_PROCESSES", cp);
	free(cp);
    }

    if (cl_args->comm == COMM_MPICH2_PMI) {
	growstr_t *hp = growstr_init();
	growstr_printf(hp, "%s:%d", nodes[0].name, master_port);
	env_add("PMI_PORT", hp->s);
	growstr_free(hp);
	if (spawn > 0)
	    env_add_int("PMI_SPAWNED", 1);
    }

    if (cl_args->comm == COMM_PORTALS) {
	growstr_t *nidmap = growstr_init();
	growstr_t *pidmap = growstr_init();
	portals_build_nidpid_maps(spawn, nidmap, pidmap);
	env_add("PTL_NIDMAP", nidmap->s);
	env_add("PTL_PIDMAP", pidmap->s);
	growstr_free(nidmap);
	growstr_free(pidmap);
    	env_add("PTL_IFACE", "eth0");  /* XXX: no way to know */
    }

    if (cl_args->comm == COMM_MPICH_P4 && numtasks > 1)
	master_port = prepare_p4_master_port();

    if (cl_args->comm == COMM_MPICH_PSM) {
	/* We need to generate a uuid of the form
	 * 9dea0f22-39a4-462a-80c9-b60b28cdfd38.  If /usr/bin/uuidgen exists,
	 * we should probably just use that.
	 * 4bytes-2bytes-2bytes-2bytes-6bytes
	 */
	char uuid_packed[16];
	unsigned char *p = (unsigned char *) uuid_packed;
	int fd, rret;
	
	fd = open("/dev/urandom", O_RDONLY);
	if (fd < 0)
	    error_errno("%s: open /dev/urandom", __func__);
	rret = read_full_ret(fd, uuid_packed, sizeof(uuid_packed));
	if (rret < 0)
	    error_errno("%s: read /dev/urandom", __func__);
	if (rret != sizeof(uuid_packed))
	    error("%s: short read /dev/urandom", __func__);
	close(fd);
	psm_uuid = Malloc(37);  /* 16 * 2 + 4 + 1 */
	snprintf(psm_uuid, 37,
		 "%02x%02x%02x%02x-%02x%02x-%02x%02x-"
		 "%02x%02x-%02x%02x%02x%02x%02x%02x",
		 p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
		 p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
	psm_uuid[36] = '\0';
    }

    /*
     * Ports on which to talk to listener process for stdout/stderr
     * connection (if !-nostdout).
     */
    if (stdio_port(1) >= 0)
	env_add_int("MPIEXEC_STDOUT_PORT", stdio_port(1));
    if (stdio_port(2) >= 0)
	env_add_int("MPIEXEC_STDERR_PORT", stdio_port(2));

    /*
     * Add our hostname too, for use by the redir-helper.  And resolve
     * it now via the user's path for use by the spawns.
     */
    if (HAVE_PBSPRO_HELPER) {
	env_add("MPIEXEC_HOST", nodes[0].name);
	mpiexec_redir_helper_path = resolve_exe("mpiexec-redir-helper", 1);
    }


    /* now the env as given from pbs */
    env_add_environ();

    /* if pbs did not give us these, put in some defaults */
    env_add_if_not("PATH", _PATH_DEFPATH);
    env_add_if_not("USER", pswd->pw_name);


    /*
     * Set up for totalview attach.  Returns local port number that will be
     * used in command startup to tell processes how to find us.  These two
     * env vars are necessary in all processes.  The first tells them to
     * consume the tv_ready message.  The second is checked in MPI_Init to
     * determine if they should wait for all processes to be attached by
     * totalview.
     */
    if (cl_args->tview && cl_args->comm == COMM_MPICH2_PMI) {
	env_add_int("PMI_TOTALVIEW", 1);
	env_add_int("MPIEXEC_DEBUG", 1);
	tv_port = tv_startup(task_end - task_start);
    }

    /*
     * Spawn each task, adding its private env vars.
     * numspawned set to zero earlier before signal handler setup;
     * both it and i walk the iterations in the loop.
     */
    for (i=task_start; i<task_end; i++) {
	env_push();
	if (cl_args->comm == COMM_MPICH_GM) {
	    /* build proc-specific gmpi_opts in envp */
	    env_add_int("GMPI_ID", i);
	    env_add_int("MXMPI_ID", i);
	    env_add("GMPI_SLAVE", nodes[tasks[i].node].name);  /* 1.2.5..10 */
	}
	if (cl_args->comm == COMM_SHMEM) {
	    /* earlier in get_hosts we checked that there is only one task */
	    env_add_int("MPICH_NP", tasks[0].num_copies);
	}

	if (cl_args->comm == COMM_MPICH_IB || cl_args->comm == COMM_MPICH_RAI)
	    env_add_int("MPIRUN_RANK", i);

	if (cl_args->comm == COMM_MPICH_IB) {
	    /* hack for topspin adaptation of mvapich 0.9.2 */
	    env_add("MPIRUN_NODENAME", nodes[tasks[i].node].name);
	}

	if (cl_args->comm == COMM_MPICH2_PMI) {
	    /* task id is always 0-based, even for spawn  */
	    env_add_int("PMI_ID", i - task_start);
	    if (strcmp(nodes[tasks[i].node].mpname,
	               nodes[tasks[i].node].name) != 0)
		env_add("MPICH_INTERFACE_HOSTNAME",
		        nodes[tasks[i].node].mpname);
	}

	if (cl_args->comm == COMM_MPICH_PSM) {
	    /* build one big string with everything in it */
	    char buf[2048];
	    snprintf(buf, sizeof(buf) - 1,
		     "%d %d %s %d %d %d %d %d %d %d %s",
		     0,    /* protocol version */
		     0x4,  /* protocol flags, ASYNC_SHUTDOWN=0x4 */
		     nodes[0].name,  /* spawner host */
		     master_port,    /* spawner port */
		     atoi(jobid),    /* spawner jobid */
		     numtasks,       /* COMM_WORLD size */
		     i - task_start, /* COMM_WORLD rank for this process */
		     nodes[tasks[i].node].numcpu, /* num local ranks */
		     tasks[i].cpu_index[0],       /* my local rank */
		     60, /* timeout... */
		     psm_uuid);
	    buf[sizeof(buf) - 1] = '\0';
	    env_add("MPI_SPAWNER", buf);
	}

	if (cl_args->comm == COMM_PORTALS)
	    env_add_int("PTL_MY_RID", i);

	if (cl_args->comm == COMM_NONE)
	    env_add_int("MPIEXEC_RANK", i);
        
	/* either no stdin, or just to proc #0, or to all of them */
	if (cl_args->which_stdin == STDIN_ONE && i == 0) {
	    env_add_int("MPIEXEC_STDIN_PORT", stdio_port(0));
	    /* do not add _HOST for p4, since we don't want
	     * the children of the big or remote master to
	     * connect.  This _PORT is just for PBS, not for MPICH.  */
	}
	if (cl_args->which_stdin == STDIN_ALL) {
	    env_add_int("MPIEXEC_STDIN_PORT", stdio_port(0));
	    if (cl_args->comm == COMM_MPICH_P4)
		/* slave processes need to be told which host, as the stdin
		 * connection happens not in pbs_mom, but in mpich/p4 library
		 * code when it spawns each of the other tasks. */
		env_add("MPIEXEC_STDIN_HOST", nodes[0].name);
	}

	env_terminate();

	/* build proc-specific command line */
	growstr_zero(g);
	g->translate_single_quote = 0;

	/*
	 * Totalview is a bit odd, even hackish perhaps.  Send the pid
	 * the just-starting process to ourselves via /dev/tcp, some sort
	 * of virtual device that makes a TCP connection as told and sends
	 * the echoed data.
	 */
	if (cl_args->tview && cl_args->comm == COMM_MPICH2_PMI)
	    growstr_printf(g, "if hash nc > /dev/null; then printf %%10d $$ | nc %s %d; else printf %%10d $$ > /dev/tcp/%s/%d; fi; "
	    		   "if test -d \"%s\"; then cd \"%s\"; fi; exec %s -c ",
			   nodes[0].name, tv_port,
			   nodes[0].name, tv_port,
			   pwd, pwd, user_shell);
	else
	    growstr_printf(g,
			   "if test -d \"%s\"; then cd \"%s\"; fi; exec %s -c ",
			   pwd, pwd, user_shell);
	growstr_append(g, "'exec ");
	g->translate_single_quote = 1;

	/*
	 * PBSPro environments do not know how to redirect standard streams.
	 * So we fork a helper program that lives in the user's PATH, hopefully
	 * the same place as mpiexec, that does the redirection then execs the
	 * actual executable.  This will break on OpenPBS or Torque, although
	 * I guess the redir helper could unset the env vars, but I'd rather
	 * people just didn't use the redir helper in that case.
	 */
	if (HAVE_PBSPRO_HELPER)
	    growstr_printf(g, "%s ", mpiexec_redir_helper_path);

	/*
	 * The executable, or a debugger wrapper around it.  In the mpich2
	 * case we don't need any special args.
	 */
	if (cl_args->tview && cl_args->comm != COMM_MPICH2_PMI) {
	    if (i == 0)
		growstr_printf(g, "%s %s -a -mpichtv", tvname,
			       tasks[i].conf->exe);
	    else
		growstr_printf(g, "%s -mpichtv", tasks[i].conf->exe);
	} else
	    growstr_printf(g, "%s", tasks[i].conf->exe);

	/* process arguments _before_ p4 arguments to allow xterm/gdb hack */
	if (tasks[i].conf->args)
	    growstr_printf(g, " %s", tasks[i].conf->args);

	if (cl_args->comm == COMM_MPICH_P4) {
	    /*
	     * Pass the cwd to ch_p4, else it tries to chdir(exedir).  Thanks
	     * to Ben Webb <*****@*****.**> for fixing this.
	     */
	    growstr_printf(g, " -p4wd %s", pwd);

	    /* The actual flag names are just for debugging; they're not used
	     * but the order is important. */
	    growstr_printf(g, " -execer_id mpiexec");
	    growstr_printf(g, " -master_host %s", nodes[tasks[0].node].mpname);
	    growstr_printf(g, " -my_hostname %s", nodes[tasks[i].node].mpname);
	    growstr_printf(g, " -my_nodenum %d", i);
	    growstr_printf(g, " -my_numprocs %d", tasks[i].num_copies);
	    growstr_printf(g, " -total_numnodes %d", numtasks);
	    growstr_printf(g, " -master_port %d", master_port);
	    if (i == 0 && numtasks > 1) {
		int j;
		/* list of: <hostname> <procs-on-that-node> */
		growstr_printf(g, " -remote_info");
		for (j=1; j<numtasks; j++)
		    growstr_printf(g, " %s %d",
		      nodes[tasks[j].node].mpname, tasks[j].num_copies);
	    }
	}

	g->translate_single_quote = 0;
	growstr_printf(g, "'");  /* close quote for 'exec myjob ...' */
	nargv[2] = g->s;

	/*
	 * Dump all the info if sufficiently verbose.
	 */
	debug(2, "%s: command to %d/%d %s: %s", __func__, i, numtasks,
	  nodes[tasks[i].node].name, nargv[2]);
	if (cl_args->verbose > 2) {
	    int j;
	    debug(3, "%s: environment to %d/%d %s", __func__, i,
	      numtasks, nodes[tasks[i].node].name);
	    for (j=0; (cp = envp[j]); j++)
		printf("env %2d %s\n", j, cp);
	}

	if (concurrent_master) {
	    tm_event_t evt;
	    int err;

	    /* Note, would like to add obit immediately, but that is
	     * not allowed until the START message is polled.
	     */
	    err = tm_spawn(list_count(nargv), nargv, envp,
	                   nodes[tasks[i].node].ids[tasks[i].cpu_index[0]],
			   &tasks[i].tid, &evt);
	    if (err != TM_SUCCESS)
		error_tm(err, "%s: tm_spawn task %d", __func__, i);
	    evt_add(evt, -1, i, EVT_START);
	} else {
	    concurrent_request_spawn(i, list_count(nargv), nargv, envp,
	      nodes[tasks[i].node].ids[tasks[i].cpu_index[0]]);
	}
	tasks[i].done = DONE_NOT;  /* has now been started */
	env_pop();
	++numspawned;
	++numtasks_waiting_start;

	if (cl_args->comm == COMM_MPICH_P4 && i == 0 && numtasks > 1) {
	    ret = wait_task_start();
	    if (ret)
		break;  /* don't bother trying to start the rest */
	    ret = read_p4_master_port(&master_port);
	    if (ret)
		break;
	}

	/*
	 * Pay attention to incoming tasks so they don't time out while
	 * we're starting up all the others, non blocking.
	 */
	if (cl_args->comm == COMM_MPICH_IB) {
	    int one = 1;
	    for (;;) {
		ret = service_ib_startup(one);
		one = 0;  /* only report the new task that first time */
		if (ret < 0) {
		    ret = 1;
		    goto out;
		}
		if (ret == 0)  /* nothing accomplished */
		    break;
	    }
	}
	if (cl_args->comm == COMM_MPICH_GM) {
	    int one = 1;
	    for (;;) {
		ret = service_gm_startup(one);
		one = 0;  /* only report the new task that first time */
		if (ret < 0) {
		    ret = 1;
		    goto out;
		}
		if (ret == 0)  /* nothing accomplished */
		    break;
	    }
	}
	if (cl_args->comm == COMM_MPICH_PSM) {
	    int one = 1;
	    for (;;) {
		ret = service_psm_startup(one);
		one = 0;  /* only report the new task that first time */
		if (ret < 0) {
		    ret = 1;
		    goto out;
		}
		if (ret == 0)  /* nothing accomplished */
		    break;
	    }
	}
	if (cl_args->tview && cl_args->comm == COMM_MPICH2_PMI)
	    tv_accept_one(i);
    }

    if (cl_args->tview && cl_args->comm == COMM_MPICH2_PMI)
       tv_complete();

    /* don't need these anymore */
    free(nargv[0]);
    free(nargv[1]);
    growstr_free(g);

    if (cl_args->comm == COMM_MPICH_PSM)
	free(psm_uuid);
    if (ret)
	goto out;

    /*
     * Wait for spawn events and submit obit requests.
     */
    while (numtasks_waiting_start) {
	ret = wait_task_start();
	if (ret)
	    goto out;
    }

    debug(1, "All %d task%s (spawn %d) started", task_end - task_start,
          task_end - task_start > 1 ? "s": "", spawn);

    /*
     * Finalize mpi-specific startup protocal, e.g. wait for all tasks to
     * checkin, perform barrier, etc.
     */
    if (cl_args->comm == COMM_MPICH_GM)
	ret = read_gm_startup_ports();

    if (cl_args->comm == COMM_MPICH_IB)
	ret = read_ib_startup_ports();

    if (cl_args->comm == COMM_MPICH_PSM)
	ret = read_psm_startup_ports();

    if (cl_args->comm == COMM_MPICH_RAI)
	ret = read_rai_startup_ports();

    if (ret == 0)
	startup_complete = 1;

  out:
    return ret;
}