int p_mpi_hook_slurmstepd_task (const mpi_plugin_task_info_t *job, char ***env) { int i; env_array_overwrite_fmt(env, "PMI_FD", "%u", TASK_PMI_SOCK(job->ltaskid)); env_array_overwrite_fmt(env, "PMI_JOBID", "%s", job_info.pmi_jobid); env_array_overwrite_fmt(env, "PMI_RANK", "%u", job->gtaskid); env_array_overwrite_fmt(env, "PMI_SIZE", "%u", job->ntasks); if (job_info.spawn_seq) { /* PMI1.1 needs this env-var */ env_array_overwrite_fmt(env, "PMI_SPAWNED", "%u", 1); } /* close unused sockets in task */ close(tree_sock); tree_sock = 0; for (i = 0; i < job->ltasks; i ++) { close(STEPD_PMI_SOCK(i)); STEPD_PMI_SOCK(i) = 0; if (i != job->ltaskid) { close(TASK_PMI_SOCK(i)); TASK_PMI_SOCK(i) = 0; } } return SLURM_SUCCESS; }
/* * task_p_pre_launch() is called prior to exec of application task. * It is followed by TaskProlog program (from slurm.conf) and * --task-prolog (from srun command line). */ extern int task_p_pre_launch (stepd_step_rec_t *job) { #ifdef HAVE_NATIVE_CRAY int rc; uint64_t apid; DEF_TIMERS; START_TIMER; apid = SLURM_ID_HASH(job->jobid, job->stepid); debug2("task_p_pre_launch: %u.%u, apid %"PRIu64", task %d", job->jobid, job->stepid, apid, job->envtp->procid); /* * Send the rank to the application's PMI layer via an environment * variable. */ rc = env_array_overwrite_fmt(&job->env, ALPS_APP_PE_ENV, "%d", job->envtp->procid); if (rc == 0) { CRAY_ERR("Failed to set env variable %s", ALPS_APP_PE_ENV); return SLURM_ERROR; } /* * Set the PMI_NO_FORK environment variable. */ rc = env_array_overwrite(&job->env, PMI_NO_FORK_ENV, "1"); if (rc == 0) { CRAY_ERR("Failed to set env variable %s", PMI_NO_FORK_ENV); return SLURM_ERROR; } /* * Notify the task which offset to use */ rc = env_array_overwrite_fmt(&job->env, LLI_STATUS_OFFS_ENV, "%d", job->envtp->localid + 1); if (rc == 0) { CRAY_ERR("Failed to set env variable %s", LLI_STATUS_OFFS_ENV); return SLURM_ERROR; } /* * Set the ALPS_APP_ID environment variable for use by * Cray tools. */ rc = env_array_overwrite_fmt(&job->env, ALPS_APP_ID_ENV, "%"PRIu64, apid); if (rc == 0) { CRAY_ERR("Failed to set env variable %s", ALPS_APP_ID_ENV); } END_TIMER; if (debug_flags & DEBUG_FLAG_TIME_CRAY) INFO_LINE("call took: %s", TIME_STR); #endif return SLURM_SUCCESS; }
int p_mpi_hook_slurmstepd_task (const mpi_plugin_client_info_t *job, char ***env) { char *nodelist, *task_cnt; nodelist = getenvp(*env, "SLURM_NODELIST"); if (nodelist) { char *host_str = NULL, *tmp; hostlist_t hl = hostlist_create(nodelist); while ((tmp = hostlist_shift(hl))) { if (host_str) xstrcat(host_str, ","); xstrcat(host_str, tmp); free(tmp); } hostlist_destroy(hl); env_array_overwrite_fmt(env, "SLURM_MPICH_NODELIST", "%s", host_str); xfree(host_str); } task_cnt = getenvp(*env, "SLURM_TASKS_PER_NODE"); if (task_cnt) { char *task_str = NULL, tmp_str[32]; int i=0, val, reps; while (task_cnt[i]) { if ((task_cnt[i] >= '0') && (task_cnt[i] <= '9')) val = atoi(&task_cnt[i]); else break; /* bad parse */ i++; while (task_cnt[i] && (task_cnt[i] != 'x') && (task_cnt[i] != ',')) i++; if (task_cnt[i] == 'x') { i++; reps = atoi(&task_cnt[i]); while (task_cnt[i] && (task_cnt[i] != ',')) i++; } else reps = 1; if (task_cnt[i] == ',') i++; while (reps) { if (task_str) xstrcat(task_str, ","); snprintf(tmp_str, sizeof(tmp_str), "%d", val); xstrcat(task_str, tmp_str); reps--; } } env_array_overwrite_fmt(env, "SLURM_MPICH_TASKS", "%s", task_str); xfree(task_str); } return SLURM_SUCCESS; }
static int _setup_srun_environ(const mpi_plugin_client_info_t *job, char ***env) { /* ifhn will be set in SLURM_SRUN_COMM_HOST by slurmd */ env_array_overwrite_fmt(env, PMI2_SRUN_PORT_ENV, "%hu", tree_info.pmi_port); env_array_overwrite_fmt(env, PMI2_STEP_NODES_ENV, "%s", job_info.step_nodelist); env_array_overwrite_fmt(env, PMI2_PROC_MAPPING_ENV, "%s", job_info.proc_mapping); return SLURM_SUCCESS; }
int p_mpi_hook_slurmstepd_task(const mpi_plugin_task_info_t *job, char ***env) { char addrbuf[1024]; char *p; char *addr = getenvp(*env, "SLURM_LAUNCH_NODE_IPADDR"); debug("Using mpi/mpich-gm"); slurm_print_slurm_addr (job->self, addrbuf, sizeof(addrbuf)); if ((p = strchr (addrbuf, ':')) != NULL) *p = '\0'; env_array_overwrite_fmt(env, "GMPI_MASTER", "%s", addr); env_array_overwrite_fmt(env, "GMPI_SLAVE", "%s", addrbuf); env_array_overwrite_fmt(env, "GMPI_ID", "%u", job->gtaskid); if (!getenv("GMPI_RECV")) { env_array_overwrite_fmt(env, "GMPI_RECV", "%s", "hybrid"); } env_array_overwrite_fmt(env, "MXMPI_MASTER", "%s", addr); env_array_overwrite_fmt(env, "MXMPI_ID", "%u", job->gtaskid); env_array_overwrite_fmt(env, "MXMPI_SLAVE", "%s", addrbuf); if (!getenv("MXMPI_RECV")) { env_array_overwrite_fmt(env, "MXMPI_RECV", "%s", "hybrid"); } debug2("init for mpi rank %u", job->gtaskid); return SLURM_SUCCESS; }
static void _setup_exec_srun(spawn_req_t *req) { char **env, env_key[32]; int i, rc; spawn_resp_t *resp; debug3("mpi/pmi2: in _setup_exec_srun"); /* setup environments */ env = env_array_copy((const char **)job_info.job_env); /* TODO: unset some env-vars */ env_array_overwrite_fmt(&env, "SLURM_JOB_ID", "%u", job_info.jobid); env_array_overwrite_fmt(&env, PMI2_SPAWNER_JOBID_ENV, "%s", job_info.pmi_jobid); env_array_overwrite_fmt(&env, PMI2_PMI_JOBID_ENV, "%s-%u", job_info.pmi_jobid, req->seq); env_array_overwrite_fmt(&env, PMI2_SPAWN_SEQ_ENV, "%u", req->seq); env_array_overwrite_fmt(&env, PMI2_SPAWNER_PORT_ENV, "%hu", tree_info.pmi_port); /* preput kvs */ env_array_overwrite_fmt(&env, PMI2_PREPUT_CNT_ENV, "%d", req->preput_cnt); for (i = 0; i < req->preput_cnt; i ++) { snprintf(env_key, 32, PMI2_PPKEY_ENV"%d", i); env_array_overwrite_fmt(&env, env_key, "%s", req->pp_keys[i]); snprintf(env_key, 32, PMI2_PPVAL_ENV"%d", i); env_array_overwrite_fmt(&env, env_key, "%s", req->pp_vals[i]); } if (req->subcmd_cnt == 1) { /* no return if success */ rc = _exec_srun_single(req, env); } else { /* no return if success */ rc = _exec_srun_multiple(req, env); } resp = spawn_resp_new(); resp->seq = req->seq; xstrfmtcat(resp->jobid, "%s-%u", job_info.pmi_jobid, req->seq); resp->error_cnt = 0; resp->rc = rc; /* fake a srun address */ tree_info.srun_addr = xmalloc(sizeof(slurm_addr_t)); slurm_set_addr(tree_info.srun_addr, tree_info.pmi_port, "127.0.0.1"); spawn_resp_send_to_srun(resp); spawn_resp_free(resp); exit(errno); }
/* * task_p_pre_launch() is called prior to exec of application task. * It is followed by TaskProlog program (from slurm.conf) and * --task-prolog (from srun command line). */ extern int task_p_pre_launch (stepd_step_rec_t *job) { #ifdef HAVE_NATIVE_CRAY int rc; debug("task_p_pre_launch: %u.%u, task %d", job->jobid, job->stepid, job->envtp->procid); /* * Send the rank to the application's PMI layer via an environment * variable. */ rc = env_array_overwrite_fmt(&job->env, ALPS_APP_PE_ENV, "%d", job->envtp->procid); if (rc == 0) { CRAY_ERR("Failed to set env variable %s", ALPS_APP_PE_ENV); return SLURM_ERROR; } /* * Set the PMI_NO_FORK environment variable. */ rc = env_array_overwrite(&job->env, PMI_NO_FORK_ENV, "1"); if (rc == 0) { CRAY_ERR("Failed to set env variable %s", PMI_NO_FORK_ENV); return SLURM_ERROR; } /* * Notify the task which offset to use */ rc = env_array_overwrite_fmt(&job->env, LLI_STATUS_OFFS_ENV, "%d", job->envtp->localid + 1); if (rc == 0) { CRAY_ERR("Failed to set env variable %s", LLI_STATUS_OFFS_ENV); return SLURM_ERROR; } #endif return SLURM_SUCCESS; }
int p_mpi_hook_slurmstepd_task (const mpi_plugin_task_info_t *job, char ***env) { int i; char *processes = NULL; char *addr = getenvp (*env, "SLURM_LAUNCH_NODE_IPADDR"); debug("Using mpi/mvapich"); env_array_overwrite_fmt(env, "MPIRUN_HOST", "%s", addr); env_array_overwrite_fmt(env, "MPIRUN_RANK", "%u", job->gtaskid); env_array_overwrite_fmt(env, "MPIRUN_MPD", "0"); debug2("init for mpi rank %u", job->gtaskid); if (getenvp (*env, "SLURM_NEED_MVAPICH_MPIRUN_PROCESSES")) { /* * Fake MPIRUN_PROCESSES env var -- we don't need this for * SLURM at this time. (what a waste) */ for (i = 0; i < job->ntasks; i++) xstrcat (processes, "x:"); env_array_overwrite_fmt(env, "MPIRUN_PROCESSES", "%s", processes); } /* * Some mvapich versions will ignore MPIRUN_PROCESSES If * the following env var is set. */ env_array_overwrite_fmt(env, "NOT_USE_TOTALVIEW", "1"); /* * Set VIADEV_ENABLE_AFFINITY=0 so that mvapich doesn't * override SLURM's CPU affinity. (Unless this var is * already set in user env) */ if (!getenvp (*env, "VIADEV_ENABLE_AFFINITY")) env_array_overwrite_fmt(env, "VIADEV_ENABLE_AFFINITY", "0"); return SLURM_SUCCESS; }
extern gmpi_state_t * gmpi_thr_create(const mpi_plugin_client_info_t *job, char ***env) { uint16_t port; pthread_attr_t attr; gmpi_state_t *st = NULL; st = gmpi_state_create(job); /* * It is possible for one to modify the mpirun command in * MPICH-GM distribution so that it calls srun, instead of * rsh, for remote process invocations. In that case, we * should not override envs nor open the master port. */ if (getenv("GMPI_PORT")) return st; if (net_stream_listen (&st->fd, &port) < 0) { error ("Unable to create GMPI listen port: %m"); gmpi_state_destroy(st); return NULL; } /* * Accept in a separate thread. */ slurm_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); if (pthread_create(&st->tid, &attr, &_gmpi_thr, (void *)st)) { slurm_attr_destroy(&attr); gmpi_state_destroy(st); return NULL; } slurm_attr_destroy(&attr); env_array_overwrite_fmt(env, "GMPI_PORT", "%hu", port); env_array_overwrite_fmt(env, "GMPI_MAGIC", "%u", job->jobid); env_array_overwrite_fmt(env, "GMPI_NP", "%d", job->step_layout->task_cnt); env_array_overwrite_fmt(env, "GMPI_SHMEM", "1"); /* FIXME for multi-board config. */ env_array_overwrite_fmt(env, "GMPI_BOARD", "-1"); /* For new MX version */ env_array_overwrite_fmt(env, "MXMPI_PORT", "%hu", port); env_array_overwrite_fmt(env, "MXMPI_MAGIC", "%u", job->jobid); env_array_overwrite_fmt(env, "MXMPI_NP", "%d", job->step_layout->task_cnt); /* FIXME for multi-board config. */ env_array_overwrite_fmt(env, "MXMPI_BOARD", "-1"); /* for MACOSX to override default malloc */ env_array_overwrite_fmt(env, "DYLD_FORCE_FLAT_NAMESPACE", "1"); debug("Started GMPI master thread (%lu)", (unsigned long) st->tid); return st; }
/* * Set in "dest" the environment variables relevant to a SLURM job * allocation, overwriting any environment variables of the same name. * If the address pointed to by "dest" is NULL, memory will automatically be * xmalloc'ed. The array is terminated by a NULL pointer, and thus is * suitable for use by execle() and other env_array_* functions. * * Sets the variables: * SLURM_JOB_ID * SLURM_JOB_NUM_NODES * SLURM_JOB_NODELIST * SLURM_JOB_CPUS_PER_NODE * LOADLBATCH (AIX only) * SLURM_BG_NUM_NODES, MPIRUN_PARTITION, MPIRUN_NOFREE, and * MPIRUN_NOALLOCATE (BG only) * * Sets OBSOLETE variables (needed for MPI, do not remove): * SLURM_JOBID * SLURM_NNODES * SLURM_NODELIST * SLURM_TASKS_PER_NODE */ int env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, const job_desc_msg_t *desc) { char *tmp = NULL; char *dist = NULL, *lllp_dist = NULL; slurm_step_layout_t *step_layout = NULL; uint32_t num_tasks = desc->num_tasks; int rc = SLURM_SUCCESS; uint32_t node_cnt = alloc->node_cnt; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); _setup_particulars(cluster_flags, dest, alloc->select_jobinfo); if (cluster_flags & CLUSTER_FLAG_BG) { select_g_select_jobinfo_get(alloc->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &node_cnt); if (!node_cnt) node_cnt = alloc->node_cnt; env_array_overwrite_fmt(dest, "SLURM_BG_NUM_NODES", "%u", node_cnt); } env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", alloc->job_id); env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", node_cnt); env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", alloc->node_list); _set_distribution(desc->task_dist, &dist, &lllp_dist); if(dist) env_array_overwrite_fmt(dest, "SLURM_DISTRIBUTION", "%s", dist); if(desc->task_dist == SLURM_DIST_PLANE) env_array_overwrite_fmt(dest, "SLURM_DIST_PLANESIZE", "%u", desc->plane_size); if(lllp_dist) env_array_overwrite_fmt(dest, "SLURM_DIST_LLLP", "%s", lllp_dist); tmp = uint32_compressed_to_str(alloc->num_cpu_groups, alloc->cpus_per_node, alloc->cpu_count_reps); env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp); xfree(tmp); /* OBSOLETE, but needed by MPI, do not remove */ env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", alloc->job_id); env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", node_cnt); env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", alloc->node_list); if(num_tasks == NO_VAL) { /* If we know how many tasks we are going to do then we set SLURM_TASKS_PER_NODE */ int i=0; /* If no tasks were given we can figure it out here * by totalling up the cpus and then dividing by the * number of cpus per task */ num_tasks = 0; for (i = 0; i < alloc->num_cpu_groups; i++) { num_tasks += alloc->cpu_count_reps[i] * alloc->cpus_per_node[i]; } if((int)desc->cpus_per_task > 1 && desc->cpus_per_task != (uint16_t)NO_VAL) num_tasks /= desc->cpus_per_task; //num_tasks = desc->min_cpus; } if(desc->task_dist == SLURM_DIST_ARBITRARY) { tmp = desc->req_nodes; env_array_overwrite_fmt(dest, "SLURM_ARBITRARY_NODELIST", "%s", tmp); } else tmp = alloc->node_list; if(!(step_layout = slurm_step_layout_create(tmp, alloc->cpus_per_node, alloc->cpu_count_reps, node_cnt, num_tasks, desc->cpus_per_task, desc->task_dist, desc->plane_size))) return SLURM_ERROR; tmp = _uint16_array_to_str(step_layout->node_cnt, step_layout->tasks); slurm_step_layout_destroy(step_layout); env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp); xfree(tmp); return rc; }
/* * Set in "dest" the environment variables relevant to a SLURM job step, * overwriting any environment variables of the same name. If the address * pointed to by "dest" is NULL, memory will automatically be xmalloc'ed. * The array is terminated by a NULL pointer, and thus is suitable for * use by execle() and other env_array_* functions. If preserve_env is * true, the variables SLURM_NNODES, SLURM_NTASKS and SLURM_TASKS_PER_NODE * remain unchanged. * * Sets variables: * SLURM_STEP_ID * SLURM_STEP_NUM_NODES * SLURM_STEP_NUM_TASKS * SLURM_STEP_TASKS_PER_NODE * SLURM_STEP_LAUNCHER_PORT * SLURM_STEP_LAUNCHER_IPADDR * SLURM_STEP_RESV_PORTS * * Sets OBSOLETE variables: * SLURM_STEPID * SLURM_NNODES * SLURM_NTASKS * SLURM_NODELIST * SLURM_TASKS_PER_NODE * SLURM_SRUN_COMM_PORT * SLURM_LAUNCH_NODE_IPADDR * */ void env_array_for_step(char ***dest, const job_step_create_response_msg_t *step, uint16_t launcher_port, bool preserve_env) { char *tmp; tmp = _uint16_array_to_str(step->step_layout->node_cnt, step->step_layout->tasks); env_array_overwrite_fmt(dest, "SLURM_STEP_ID", "%u", step->job_step_id); env_array_overwrite_fmt(dest, "SLURM_STEP_NODELIST", "%s", step->step_layout->node_list); env_array_overwrite_fmt(dest, "SLURM_STEP_NUM_NODES", "%hu", step->step_layout->node_cnt); env_array_overwrite_fmt(dest, "SLURM_STEP_NUM_TASKS", "%u", step->step_layout->task_cnt); env_array_overwrite_fmt(dest, "SLURM_STEP_TASKS_PER_NODE", "%s", tmp); env_array_overwrite_fmt(dest, "SLURM_STEP_LAUNCHER_PORT", "%hu", launcher_port); if (step->resv_ports) { env_array_overwrite_fmt(dest, "SLURM_STEP_RESV_PORTS", "%s", step->resv_ports); } /* OBSOLETE, but needed by MPI, do not remove */ env_array_overwrite_fmt(dest, "SLURM_STEPID", "%u", step->job_step_id); if (!preserve_env) { env_array_overwrite_fmt(dest, "SLURM_NNODES", "%hu", step->step_layout->node_cnt); env_array_overwrite_fmt(dest, "SLURM_NTASKS", "%u", step->step_layout->task_cnt); /* keep around for old scripts */ env_array_overwrite_fmt(dest, "SLURM_NPROCS", "%u", step->step_layout->task_cnt); env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp); } env_array_overwrite_fmt(dest, "SLURM_SRUN_COMM_PORT", "%hu", launcher_port); xfree(tmp); }
/* * Set in "dest" the environment variables strings relevant to a SLURM batch * job allocation, overwriting any environment variables of the same name. * If the address pointed to by "dest" is NULL, memory will automatically be * xmalloc'ed. The array is terminated by a NULL pointer, and thus is * suitable for use by execle() and other env_array_* functions. * * Sets the variables: * SLURM_JOB_ID * SLURM_JOB_NUM_NODES * SLURM_JOB_NODELIST * SLURM_JOB_CPUS_PER_NODE * ENVIRONMENT=BATCH * HOSTNAME * LOADLBATCH (AIX only) * * Sets OBSOLETE variables (needed for MPI, do not remove): * SLURM_JOBID * SLURM_NNODES * SLURM_NODELIST * SLURM_NTASKS * SLURM_TASKS_PER_NODE */ extern int env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, const char *node_name) { char *tmp = NULL; uint32_t num_nodes = 0; uint32_t num_cpus = 0; int i; slurm_step_layout_t *step_layout = NULL; uint32_t num_tasks = batch->ntasks; uint16_t cpus_per_task; uint16_t task_dist; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); _setup_particulars(cluster_flags, dest, batch->select_jobinfo); /* There is no explicit node count in the batch structure, * so we need to calculate the node count. */ for (i = 0; i < batch->num_cpu_groups; i++) { num_nodes += batch->cpu_count_reps[i]; num_cpus += batch->cpu_count_reps[i] * batch->cpus_per_node[i]; } env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", batch->job_id); env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", num_nodes); if(cluster_flags & CLUSTER_FLAG_BG) env_array_overwrite_fmt(dest, "SLURM_BG_NUM_NODES", "%u", num_nodes); env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", batch->nodes); tmp = uint32_compressed_to_str(batch->num_cpu_groups, batch->cpus_per_node, batch->cpu_count_reps); env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp); xfree(tmp); env_array_overwrite_fmt(dest, "ENVIRONMENT", "BATCH"); if (node_name) env_array_overwrite_fmt(dest, "HOSTNAME", "%s", node_name); /* OBSOLETE, but needed by MPI, do not remove */ env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", batch->job_id); env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", num_nodes); env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", batch->nodes); if((batch->cpus_per_task != 0) && (batch->cpus_per_task != (uint16_t) NO_VAL)) cpus_per_task = batch->cpus_per_task; else cpus_per_task = 1; /* default value */ if (cpus_per_task > 1) { env_array_overwrite_fmt(dest, "SLURM_CPUS_PER_TASK", "%u", cpus_per_task); } if(num_tasks) { env_array_overwrite_fmt(dest, "SLURM_NTASKS", "%u", num_tasks); /* keep around for old scripts */ env_array_overwrite_fmt(dest, "SLURM_NPROCS", "%u", num_tasks); } else { num_tasks = num_cpus / cpus_per_task; } if((tmp = getenvp(*dest, "SLURM_ARBITRARY_NODELIST"))) { task_dist = SLURM_DIST_ARBITRARY; } else { tmp = batch->nodes; task_dist = SLURM_DIST_BLOCK; } if(!(step_layout = slurm_step_layout_create(tmp, batch->cpus_per_node, batch->cpu_count_reps, num_nodes, num_tasks, cpus_per_task, task_dist, (uint16_t)NO_VAL))) return SLURM_ERROR; tmp = _uint16_array_to_str(step_layout->node_cnt, step_layout->tasks); slurm_step_layout_destroy(step_layout); env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp); xfree(tmp); return SLURM_SUCCESS; }
mpi_plugin_client_state_t * p_mpi_hook_client_prelaunch(mpi_plugin_client_info_t *job, char ***env) { struct sockaddr_in sin; pthread_attr_t attr; socklen_t len = sizeof(sin); short port1, port2; debug("Using mpi/mpich1_p4"); if ((p4_fd1 = socket(PF_INET, SOCK_DGRAM, 0)) < 0) { error("socket: %m"); return NULL; } memset(&sin, 0, sizeof(sin)); sin.sin_family = PF_INET; if (bind(p4_fd1, (struct sockaddr *) &sin, len) < 0) { error("bind: %m"); return NULL; } if (getsockname(p4_fd1, (struct sockaddr *) &sin, &len) < 0) { error("getsockname: %m"); return NULL; } port1 = ntohs(sin.sin_port); if ((p4_fd2 = socket(PF_INET, SOCK_STREAM, 0)) < 0) { error("socket: %m"); return NULL; } memset(&sin, 0, sizeof(sin)); sin.sin_family = PF_INET; sin.sin_addr.s_addr = htonl(INADDR_ANY); if (bind(p4_fd2, (struct sockaddr *) &sin, len) < 0) { error("bind: %m"); return NULL; } if (listen(p4_fd2, 64) < 0) error("listen: %m"); if (getsockname(p4_fd2, (struct sockaddr *) &sin, &len) < 0) { error("getsockname: %m"); return NULL; } port2 = ntohs(sin.sin_port); if (pipe(shutdown_pipe) < 0) { error ("pipe: %m"); return (NULL); } shutdown_complete = false; shutdown_timeout = 5; slurm_mutex_init(&shutdown_lock); pthread_cond_init(&shutdown_cond, NULL); /* Process messages in a separate thread */ slurm_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); if (pthread_create(&p4_tid, &attr, &mpich1_thr, NULL)) { error("pthread_create: %m"); slurm_attr_destroy(&attr); return NULL; } slurm_attr_destroy(&attr); env_array_overwrite_fmt(env, "SLURM_MPICH_PORT1", "%hu", port1); env_array_overwrite_fmt(env, "SLURM_MPICH_PORT2", "%hu", port2); debug("mpich_p4 plugin listening on fd=%d,%d ports=%d,%d", p4_fd1, p4_fd2, port1, port2); /* only return NULL on error */ return (void *)0xdeadbeef; }
static int _exec_srun_single(spawn_req_t *req, char **env) { int argc, i, j; char **argv = NULL; spawn_subcmd_t *subcmd; debug3("mpi/mpi2: in _exec_srun_single"); subcmd = req->subcmds[0]; argc = subcmd->argc + 7; xrealloc(argv, (argc + 1) * sizeof(char *)); j = 0; argv[j ++] = "srun"; argv[j ++] = "--mpi=pmi2"; if (job_info.srun_opt && job_info.srun_opt->no_alloc) { argv[j ++] = "--no-alloc"; xstrfmtcat(argv[j ++], "--nodelist=%s", job_info.srun_opt->nodelist); } xstrfmtcat(argv[j ++], "--ntasks=%d", subcmd->max_procs); /* TODO: inherit options from srun_opt. */ for (i = 0; i < subcmd->info_cnt; i ++) { if (0) { } else if (! strcmp(subcmd->info_keys[i], "host")) { xstrfmtcat(argv[j ++], "--nodelist=%s", subcmd->info_vals[i]); } else if (! strcmp(subcmd->info_keys[i], "arch")) { error("mpi/pmi2: spawn info key 'arch' not supported"); } else if (! strcmp(subcmd->info_keys[i], "wdir")) { xstrfmtcat(argv[j ++], "--chdir=%s", subcmd->info_vals[i]); } else if (! strcmp(subcmd->info_keys[i], "path")) { env_array_overwrite_fmt(&env, "PATH", "%s", subcmd->info_vals[i]); } else if (! strcmp(subcmd->info_keys[i], "file")) { error("mpi/pmi2: spawn info key 'file' not supported"); } else if (! strcmp(subcmd->info_keys[i], "soft")) { error("mpi/pmi2: spawn info key 'soft' not supported"); } else { error("mpi/pmi2: unknown spawn info key '%s' ignored", subcmd->info_keys[i]); } } argv[j ++] = subcmd->cmd; for (i = 0; i < subcmd->argc; i ++) { argv[j ++] = subcmd->argv[i]; } argv[j ++] = NULL; { debug3("mpi/mpi2: to execve"); for (i = 0; i < j; i ++) { debug3("mpi/pmi2: argv[%d]=%s", i, argv[i]); } } execve(SLURM_PREFIX"/bin/srun", argv, env); error("mpi/pmi2: failed to exec srun: %m"); return SLURM_ERROR; }