Exemplo n.º 1
0
Arquivo: setup.c Projeto: Cray/slurm
static void *
_task_launch_detection(void *unused)
{
	spawn_resp_t *resp;
	time_t start;
	int rc = 0;

	/*
	 * mpir_init() is called in plugins/launch/slurm/launch_slurm.c before
	 * mpi_hook_client_prelaunch() is called in api/step_launch.c
	 */
	start = time(NULL);
	while (_tasks_launched() == 0) {
		usleep(1000*50);
		if (time(NULL) - start > 600) {
			rc = 1;
			break;
		}
	}

	/* send a resp to spawner srun */
	resp = spawn_resp_new();
	resp->seq = job_info.spawn_seq;
	resp->jobid = xstrdup(job_info.pmi_jobid);
	resp->error_cnt = 0;	/* TODO */
	resp->rc = rc;
	resp->pmi_port = tree_info.pmi_port;

	spawn_resp_send_to_srun(resp);
	spawn_resp_free(resp);
	return NULL;
}
Exemplo n.º 2
0
static void
_setup_exec_srun(spawn_req_t *req)
{
	char **env, env_key[32];
	int i, rc;
	spawn_resp_t *resp;

	debug3("mpi/pmi2: in _setup_exec_srun");

	/* setup environments */
	env = env_array_copy((const char **)job_info.job_env);
	/* TODO: unset some env-vars */

	env_array_overwrite_fmt(&env, "SLURM_JOB_ID", "%u", job_info.jobid);
	env_array_overwrite_fmt(&env, PMI2_SPAWNER_JOBID_ENV, "%s",
				job_info.pmi_jobid);
	env_array_overwrite_fmt(&env, PMI2_PMI_JOBID_ENV, "%s-%u",
				job_info.pmi_jobid, req->seq);
	env_array_overwrite_fmt(&env, PMI2_SPAWN_SEQ_ENV, "%u", req->seq);
	env_array_overwrite_fmt(&env, PMI2_SPAWNER_PORT_ENV, "%hu",
				tree_info.pmi_port);
	/* preput kvs */
	env_array_overwrite_fmt(&env, PMI2_PREPUT_CNT_ENV, "%d",
				req->preput_cnt);
	for (i = 0; i < req->preput_cnt; i ++) {
		snprintf(env_key, 32, PMI2_PPKEY_ENV"%d", i);
		env_array_overwrite_fmt(&env, env_key, "%s", req->pp_keys[i]);
		snprintf(env_key, 32, PMI2_PPVAL_ENV"%d", i);
		env_array_overwrite_fmt(&env, env_key, "%s", req->pp_vals[i]);
	}

	if (req->subcmd_cnt == 1) {
		/* no return if success */
		rc = _exec_srun_single(req, env);
	} else {
		/* no return if success */
		rc = _exec_srun_multiple(req, env);
	}

	resp = spawn_resp_new();
	resp->seq = req->seq;
	xstrfmtcat(resp->jobid, "%s-%u", job_info.pmi_jobid, req->seq);
	resp->error_cnt = 0;
	resp->rc = rc;

	/* fake a srun address */
	tree_info.srun_addr = xmalloc(sizeof(slurm_addr_t));
	slurm_set_addr(tree_info.srun_addr, tree_info.pmi_port,
		       "127.0.0.1");
	spawn_resp_send_to_srun(resp);
	spawn_resp_free(resp);
	exit(errno);
}