static int _setup_srun_kvs(const mpi_plugin_client_info_t *job) { int rc; rc = temp_kvs_init(); return rc; }
static int _setup_stepd_kvs(const stepd_step_rec_t *job, char ***env) { int rc = SLURM_SUCCESS, i = 0, pp_cnt = 0; char *p, env_key[32], *ppkey, *ppval; kvs_seq = 1; rc = temp_kvs_init(); if (rc != SLURM_SUCCESS) return rc; rc = kvs_init(); if (rc != SLURM_SUCCESS) return rc; /* preput */ p = getenvp(*env, PMI2_PREPUT_CNT_ENV); if (p) { pp_cnt = atoi(p); } for (i = 0; i < pp_cnt; i ++) { snprintf(env_key, 32, PMI2_PPKEY_ENV"%d", i); p = getenvp(*env, env_key); ppkey = p; /* getenvp will not modify p */ snprintf(env_key, 32, PMI2_PPVAL_ENV"%d", i); p = getenvp(*env, env_key); ppval = p; kvs_put(ppkey, ppval); } /* * For PMI11. * A better logic would be to put PMI_process_mapping in KVS only if * the task distribution method is not "arbitrary", because in * "arbitrary" distribution the process mapping varible is not correct. * MPICH2 may deduce the clique info from the hostnames. But that * is rather costly. */ kvs_put("PMI_process_mapping", job_info.proc_mapping); return SLURM_SUCCESS; }
extern int temp_kvs_send(void) { int rc = SLURM_ERROR, retry = 0; unsigned int delay = 1; char *nodelist = NULL; if (!in_stepd()) /* srun */ nodelist = xstrdup(job_info.step_nodelist); else if (tree_info.parent_node) nodelist = xstrdup(tree_info.parent_node); /* cmd included in temp_kvs_buf */ kvs_seq++; /* expecting new kvs after now */ while (1) { if (retry == 1) verbose("failed to send temp kvs, rc=%d, retrying", rc); if (nodelist) /* srun or non-first-level stepds */ rc = slurm_forward_data(&nodelist, tree_sock_addr, temp_kvs_cnt, temp_kvs_buf); else /* first level stepds */ rc = tree_msg_to_srun(temp_kvs_cnt, temp_kvs_buf); if (rc == SLURM_SUCCESS) break; if (++retry >= MAX_RETRIES) break; /* wait, in case parent stepd / srun not ready */ sleep(delay); delay *= 2; } temp_kvs_init(); /* clear old temp kvs */ xfree(nodelist); return rc; }
extern int temp_kvs_send(void) { int rc = SLURM_ERROR, retry = 0; unsigned int delay = 1; /* cmd included in temp_kvs_buf */ kvs_seq ++; /* expecting new kvs after now */ while (1) { if (retry == 1) { verbose("failed to send temp kvs, rc=%d, retrying", rc); } if (! in_stepd()) { /* srun */ rc = tree_msg_to_stepds(job_info.step_nodelist, temp_kvs_cnt, temp_kvs_buf); } else if (tree_info.parent_node != NULL) { /* non-first-level stepds */ rc = tree_msg_to_stepds(tree_info.parent_node, temp_kvs_cnt, temp_kvs_buf); } else { /* first level stepds */ rc = tree_msg_to_srun(temp_kvs_cnt, temp_kvs_buf); } if (rc == SLURM_SUCCESS) break; retry ++; if (retry >= MAX_RETRIES) break; /* wait, in case parent stepd / srun not ready */ sleep(delay); delay *= 2; } temp_kvs_init(); /* clear old temp kvs */ return rc; }