/* gasneti_bootstrapFini */ void gasneti_bootstrapFini_pmi(void) { #if USE_PMI2_API PMI2_Finalize(); #else if (PMI_FALSE == gasneti_pmi_initialized) { PMI_Finalize(); } #endif }
void mca_common_pmi_finalize (void) { if (0 == mca_common_pmi_init_count) { return; } if (0 == --mca_common_pmi_init_count) { #if WANT_PMI2_SUPPORT PMI2_Finalize (); #else PMI_Finalize (); #endif } }
void mca_common_pmi_finalize (void) { if (0 == mca_common_pmi_init_count) { return; } if (0 == --mca_common_pmi_init_count) { #if WANT_PMI2_SUPPORT if( mca_common_pmi_version == 2){ PMI2_Finalize (); } else #endif { PMI_Finalize (); } } }
static int s2_fini(void) { if (0 == pmix_init_count) { return OPAL_SUCCESS; } if (0 == --pmix_init_count) { PMI2_Finalize(); } if (NULL != pmix_kvs_name) { free(pmix_kvs_name); pmix_kvs_name = NULL; } if (NULL != s2_lranks) { free(s2_lranks); } return OPAL_SUCCESS; }
int orte_grpcomm_pmi_close(void) { #if WANT_CRAY_PMI2_EXT if (started_by_me && PMI2_Initialized()) { PMI2_Finalize(); } #else PMI_BOOL initialized; /* if we weren't selected, cleanup */ if (started_by_me && PMI_SUCCESS == PMI_Initialized(&initialized) && PMI_TRUE == initialized) { PMI_Finalize(); } #endif return ORTE_SUCCESS; }
int main(int argc, char **argv) { int spawned, size, rank, appnum; struct timeval tv, tv2; int ring_rank, ring_size; char val[128]; char buf[128]; char left[128]; char right[128]; { int x = 0; while (x) { fprintf(stderr, "attachme %d\n", getpid()); sleep(2); } } gettimeofday(&tv, NULL); PMI2_Init(&spawned, &size, &rank, &appnum); /* test PMIX_Ring */ snprintf(val, sizeof(val), "pmi_rank=%d", rank); PMIX_Ring(val, &ring_rank, &ring_size, left, right, 128); printf("pmi_rank:%d ring_rank:%d ring_size:%d left:%s mine:%s right:%s\n", rank, ring_rank, ring_size, left, val, right); PMI2_Finalize(); gettimeofday(&tv2, NULL); printf("%f\n", ((tv2.tv_sec - tv.tv_sec) * 1000.0 + (tv2.tv_usec - tv.tv_usec) / 1000.0)); return 0; }
int main(int argc, char **argv) { int ret = 0; int rc; char *str = NULL; int ti = (argc > 1 ? atoi(argv[1]) : 0); srand(time(NULL)); str = getenv("VERBOSE"); _verbose = (str ? atoi(str) : _verbose); str = getenv("LEGACY"); _legacy = (str ? atoi(str) : _legacy); spawned = random_value(10, 20); size = random_value(10, 20); rank = random_value(10, 20); appnum = random_value(10, 20); if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) { log_fatal("PMI2_Init failed: %d\n", rc); return rc; } if (!ti || 1 == ti) { rc = test_item1(); ret += (rc ? 1 : 0); log_info("TI1 : %s\n", (rc ? "FAIL" : "PASS")); } if (!ti || 2 == ti) { rc = test_item2(); ret += (rc ? 1 : 0); log_info("TI2 : %s\n", (rc ? "FAIL" : "PASS")); } if (!ti || 3 == ti) { rc = test_item3(); ret += (rc ? 1 : 0); log_info("TI3 : %s\n", (rc ? "FAIL" : "PASS")); } if (!ti || 4 == ti) { rc = test_item4(); ret += (rc ? 1 : 0); log_info("TI4 : %s\n", (rc ? "FAIL" : "PASS")); } if (!ti || 5 == ti) { rc = test_item5(); ret += (rc ? 1 : 0); log_info("TI5 : %s\n", (rc ? "FAIL" : "PASS")); } if (!ti || 6 == ti) { rc = test_item6(); ret += (rc ? 1 : 0); log_info("TI6 : %s\n", (rc ? "FAIL" : "PASS")); } if (!ti || 7 == ti) { rc = test_item7(); ret += (rc ? 1 : 0); log_info("TI7 : %s\n", (rc ? "FAIL" : "PASS")); } if (!ti || 8 == ti) { rc = test_item8(); ret += (rc ? 1 : 0); log_info("TI8 : %s\n", (rc ? "FAIL" : "PASS")); } if (!ti || 9 == ti) { rc = test_item9(); ret += (rc ? 1 : 0); log_info("TI9 : %s\n", (rc ? "FAIL" : "PASS")); } if (PMI2_SUCCESS != (rc = PMI2_Finalize())) { log_fatal("PMI2_Finalize failed: %d\n", rc); return rc; } return ret; }
int main(int argc, char **argv) { int rank; int size; int appnum; int spawned; int flag; int len; int i; struct timeval tv; struct timeval tv2; char jobid[128]; char key[128]; char val[128]; char buf[128]; { int x = 1; while (x == 0) { sleep(2); } } gettimeofday(&tv, NULL); srand(tv.tv_sec); PMI2_Init(&spawned, &size, &rank, &appnum); PMI2_Job_GetId(jobid, sizeof(buf)); memset(val, 0, sizeof(val)); PMI2_Info_GetJobAttr("mpi_reserved_ports", val, PMI2_MAX_ATTRVALUE, &flag); sprintf(key, "mpi_reserved_ports"); PMI2_KVS_Put(key, val); memset(val, 0, sizeof(val)); sprintf(buf, "PMI_netinfo_of_task"); PMI2_Info_GetJobAttr(buf, val, PMI2_MAX_ATTRVALUE, &flag); sprintf(key, buf); PMI2_KVS_Put(key, val); memset(val, 0, sizeof(val)); sprintf(key, "david@%d", rank); sprintf(val, "%s", mrand(97, 122)); PMI2_KVS_Put(key, val); PMI2_KVS_Fence(); for (i = 0; i < size; i++) { memset(val, 0, sizeof(val)); sprintf(key, "PMI_netinfo_of_task"); PMI2_KVS_Get(jobid, PMI2_ID_NULL, key, val, sizeof(val), &len); printf("rank: %d key:%s val:%s\n", rank, key, val); memset(val, 0, sizeof(val)); sprintf(key, "david@%d", rank); PMI2_KVS_Get(jobid, PMI2_ID_NULL, key, val, sizeof(val), &len); printf("rank: %d key:%s val:%s\n", rank, key, val); memset(val, 0, sizeof(val)); sprintf(key, "mpi_reserved_ports"); PMI2_KVS_Get(jobid, PMI2_ID_NULL, key, val, sizeof(val), &len); printf("rank: %d key:%s val:%s\n", rank, key, val); } PMI2_Finalize(); gettimeofday(&tv2, NULL); printf("%f\n", ((tv2.tv_sec - tv.tv_sec) * 1000.0 + (tv2.tv_usec - tv.tv_usec) / 1000.0)); return 0; }
void pmi_fini() { PMI2_Finalize(); }
static int cray_init(void) { int i, spawned, size, rank, appnum, my_node; int rc, ret = OPAL_ERROR; char *pmapping = NULL; char buf[PMI2_MAX_ATTRVALUE]; int found; uint32_t jobfam; ++pmix_init_count; /* if we can't startup PMI, we can't be used */ if ( PMI2_Initialized () ) { return OPAL_SUCCESS; } size = -1; rank = -1; appnum = -1; if (PMI_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) { opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc); return OPAL_ERROR; } if( size < 0 || rank < 0 ){ opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true); goto err_exit; } pmix_size = size; pmix_rank = rank; pmix_appnum = appnum; pmix_vallen_max = PMI2_MAX_VALLEN; pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility? pmix_keylen_max = PMI2_MAX_KEYLEN; rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); goto err_exit; } pmix_usize = atoi(buf); pmix_kvs_name = (char*)malloc(pmix_kvslen_max); if( pmix_kvs_name == NULL ){ PMI2_Finalize(); ret = OPAL_ERR_OUT_OF_RESOURCE; goto err_exit; } rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI2_Job_GetId"); goto err_exit; } rc = sscanf(pmix_kvs_name,"kvs_%u",&jobfam); if (rc != 1) { OPAL_PMI_ERROR(rc, "PMI2_Job_GetId"); rc = OPAL_ERROR; goto err_exit; } pmix_jobid = jobfam << 16; /* store our name in the opal_proc_t so that * debug messages will make sense - an upper * layer will eventually overwrite it, but that * won't do any harm */ pmix_pname.jid = pmix_jobid; pmix_pname.vid = pmix_rank; opal_proc_set_name((opal_process_name_t*)&pmix_pname); opal_output_verbose(10, opal_pmix_base_framework.framework_output, "%s pmix:cray: assigned tmp name %d %d pmix_kvs_name %s", OPAL_NAME_PRINT(*(opal_process_name_t*)&pmix_pname),pmix_pname.jid,pmix_pname.vid,pmix_kvs_name); pmapping = (char*)malloc(PMI2_MAX_VALLEN); if( pmapping == NULL ){ rc = OPAL_ERR_OUT_OF_RESOURCE; OPAL_ERROR_LOG(rc); return rc; } rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found); if( !found || PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr"); return OPAL_ERROR; } pmix_lranks = pmix_cray_parse_pmap(pmapping, pmix_rank, &my_node, &pmix_nlranks); if (NULL == pmix_lranks) { rc = OPAL_ERR_OUT_OF_RESOURCE; OPAL_ERROR_LOG(rc); return rc; } free(pmapping); /* find ourselves */ for (i=0; i < pmix_nlranks; i++) { if (pmix_rank == pmix_lranks[i]) { pmix_lrank = i; pmix_nrank = my_node; break; } } return OPAL_SUCCESS; err_exit: PMI2_Finalize(); return ret; }
int shmem_runtime_fini(void) { PMI2_Finalize(); return 0; }
void chpl_comm_ofi_oob_fini(void) { if (PMI2_Initialized() == PMI_TRUE) { DBG_PRINTF(DBG_OOB, "OOB finalize"); PMI_CHK(PMI2_Finalize()); } }
static int s2_init(void) { int spawned, size, rank, appnum; int rc, ret = OPAL_ERROR; char buf[16]; int found; int my_node; char *tmp; uint32_t jobfam, stepid; int i; /* if we can't startup PMI, we can't be used */ if ( PMI2_Initialized () ) { return OPAL_SUCCESS; } size = -1; rank = -1; appnum = -1; if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) { opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc); return OPAL_ERROR; } if( size < 0 || rank < 0 ){ opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true); goto err_exit; } s2_jsize = size; s2_rank = rank; s2_appnum = appnum; pmix_vallen_max = PMI2_MAX_VALLEN; pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility? pmix_keylen_max = PMI2_MAX_KEYLEN; pmix_vallen_threshold = PMI2_MAX_VALLEN * 3; pmix_vallen_threshold >>= 2; rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found); if( PMI2_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); goto err_exit; } s2_usize = atoi(buf); pmix_kvs_name = (char*)malloc(pmix_kvslen_max); if( pmix_kvs_name == NULL ){ PMI2_Finalize(); ret = OPAL_ERR_OUT_OF_RESOURCE; goto err_exit; } rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max); if( PMI2_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI2_Job_GetId"); goto err_exit; } /* Slurm PMI provides the job id as an integer followed * by a '.', followed by essentially a stepid. The first integer * defines an overall job number. The second integer is the number of * individual jobs we have run within that allocation. So we translate * this as the overall job number equating to our job family, and * the individual number equating to our local jobid */ jobfam = strtoul(pmix_kvs_name, &tmp, 10); if (NULL == tmp) { /* hmmm - no '.', so let's just use zero */ stepid = 0; } else { tmp++; /* step over the '.' */ stepid = strtoul(tmp, NULL, 10); } /* now build the jobid */ s2_jobid = (jobfam << 16) | stepid; /* store our name in the opal_proc_t so that * debug messages will make sense - an upper * layer will eventually overwrite it, but that * won't do any harm */ s2_pname.jobid = s2_jobid; s2_pname.vpid = s2_rank; opal_proc_set_name(&s2_pname); opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:s2: assigned tmp name", OPAL_NAME_PRINT(s2_pname)); char *pmapping = (char*)malloc(PMI2_MAX_VALLEN); if( pmapping == NULL ){ rc = OPAL_ERR_OUT_OF_RESOURCE; OPAL_ERROR_LOG(rc); return rc; } rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found); if( !found || PMI2_SUCCESS != rc ) { OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr"); return OPAL_ERROR; } s2_lranks = mca_common_pmi2_parse_pmap(pmapping, s2_pname.vpid, &my_node, &s2_nlranks); if (NULL == s2_lranks) { rc = OPAL_ERR_OUT_OF_RESOURCE; OPAL_ERROR_LOG(rc); return rc; } free(pmapping); /* find ourselves */ for (i=0; i < s2_nlranks; i++) { if (s2_rank == s2_lranks[i]) { s2_lrank = i; s2_nrank = i; break; } } /* increment the init count */ ++pmix_init_count; return OPAL_SUCCESS; err_exit: PMI2_Finalize(); return ret; }
static int mca_initialize_pmi_v2(void) { int spawned, size, rank, appnum; int rc, ret = OPAL_ERROR; /* deal with a Slurm bug by first checking if we were * even launched by a PMI server before attempting * to use PMI */ if (NULL == getenv("PMI_FD")) { return OPAL_ERROR; } /* if we can't startup PMI, we can't be used */ if ( PMI2_Initialized () ) { return OPAL_SUCCESS; } size = -1; rank = -1; appnum = -1; if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) { opal_show_help("help-common-pmi.txt", "pmi2-init-failed", true, rc); return OPAL_ERROR; } if( size < 0 || rank < 0 ){ opal_output(0, "SIZE %d RANK %d", size, rank); opal_show_help("help-common-pmi.txt", "pmi2-init-returned-bad-values", true); goto err_exit; } pmi_size = size; pmi_rank = rank; pmi_appnum = appnum; pmi_vallen_max = PMI2_MAX_VALLEN; pmi_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility? pmi_keylen_max = PMI2_MAX_KEYLEN; char buf[16]; int found; rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found); if( PMI2_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); goto err_exit; } pmi_usize = atoi(buf); pmi_kvs_name = (char*)malloc(pmi_kvslen_max); if( pmi_kvs_name == NULL ){ PMI2_Finalize(); ret = OPAL_ERR_OUT_OF_RESOURCE; goto err_exit; } rc = PMI2_Job_GetId(pmi_kvs_name, pmi_kvslen_max); if( PMI2_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI2_Job_GetId"); goto err_exit; } return OPAL_SUCCESS; err_exit: PMI2_Finalize(); return ret; }
/*@ MPIDI_PG_Finalize - Finalize the process groups, including freeing all process group structures @*/ int MPIDI_PG_Finalize(void) { int mpi_errno = MPI_SUCCESS; MPIDI_PG_t *pg, *pgNext; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_FINALIZE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_FINALIZE); /* Print the state of the process groups */ if (verbose) { MPIU_PG_Printall( stdout ); } /* FIXME - straighten out the use of PMI_Finalize - no use after PG_Finalize */ if (pg_world->connData) { #ifdef USE_PMI2_API mpi_errno = PMI2_Finalize(); if (mpi_errno) MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|pmi_finalize"); #else int rc; rc = PMI_Finalize(); if (rc) { MPIR_ERR_SET1(mpi_errno,MPI_ERR_OTHER, "**ch3|pmi_finalize", "**ch3|pmi_finalize %d", rc); } #endif } /* Free the storage associated with the process groups */ pg = MPIDI_PG_list; while (pg) { pgNext = pg->next; /* In finalize, we free all process group information, even if the ref count is not zero. This can happen if the user fails to use MPI_Comm_disconnect on communicators that were created with the dynamic process routines.*/ /* XXX DJG FIXME-MT should we be checking this? */ if (MPIR_Object_get_ref(pg) == 0 || 1) { if (pg == MPIDI_Process.my_pg) MPIDI_Process.my_pg = NULL; MPIR_Object_set_ref(pg, 0); /* satisfy assertions in PG_Destroy */ MPIDI_PG_Destroy( pg ); } pg = pgNext; } /* If COMM_WORLD is still around (it normally should be), try to free it here. The reason that we need to free it at this point is that comm_world (and comm_self) still exist, and hence the usual process to free the related VC structures will not be invoked. */ if (MPIDI_Process.my_pg) { MPIDI_PG_Destroy(MPIDI_Process.my_pg); } MPIDI_Process.my_pg = NULL; /* ifdefing out this check because the list will not be NULL in Ch3_finalize because one additional reference is retained in MPIDI_Process.my_pg. That reference is released only after ch3_finalize returns. If I release it before ch3_finalize, the ssm channel crashes. */ #if 0 if (MPIDI_PG_list != NULL) { /* --BEGIN ERROR HANDLING-- */ mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_INTERN, "**dev|pg_finalize|list_not_empty", NULL); /* --END ERROR HANDLING-- */ } #endif MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_FINALIZE); return mpi_errno; }
static int cray_init(void) { int i, spawned, size, rank, appnum, my_node; int rc, ret = OPAL_ERROR; char *pmapping = NULL; char buf[PMI2_MAX_ATTRVALUE]; int found; int major, minor, revision; uint32_t jobfam; opal_value_t kv; opal_process_name_t ldr; char nmtmp[64]; char *str, **localranks = NULL; ++pmix_init_count; /* if we can't startup PMI, we can't be used */ if ( PMI2_Initialized () ) { opal_output_verbose(10, opal_pmix_base_framework.framework_output, "%s pmix:cray: pmi already initialized", OPAL_NAME_PRINT(pmix_pname)); return OPAL_SUCCESS; } size = -1; rank = -1; appnum = -1; if (PMI_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) { opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc); return OPAL_ERROR; } if( size < 0 || rank < 0 ){ opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true); goto err_exit; } pmix_size = size; pmix_rank = rank; pmix_appnum = appnum; pmix_vallen_max = PMI2_MAX_VALLEN; pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility? pmix_keylen_max = PMI2_MAX_KEYLEN; pmix_vallen_threshold = PMI2_MAX_VALLEN * 3; pmix_vallen_threshold >>= 2; /* * get the version info */ if (PMI_SUCCESS != PMI_Get_version_info(&major,&minor,&revision)) { return OPAL_ERROR; } snprintf(cray_pmi_version, sizeof(cray_pmi_version), "%d.%d.%d", major, minor, revision); pmix_kvs_name = (char*)malloc(pmix_kvslen_max); if( pmix_kvs_name == NULL ){ PMI2_Finalize(); ret = OPAL_ERR_OUT_OF_RESOURCE; goto err_exit; } rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI2_Job_GetId"); goto err_exit; } rc = sscanf(pmix_kvs_name,"kvs_%u",&jobfam); if (rc != 1) { opal_output_verbose(10, opal_pmix_base_framework.framework_output, "%s pmix:cray: pmix_kvs_name %s", OPAL_NAME_PRINT(pmix_pname), pmix_kvs_name); rc = OPAL_ERROR; goto err_exit; } pmix_jobid = jobfam << 16; /* store our name in the opal_proc_t so that * debug messages will make sense - an upper * layer will eventually overwrite it, but that * won't do any harm */ pmix_pname.jobid = pmix_jobid; pmix_pname.vpid = pmix_rank; opal_proc_set_name(&pmix_pname); opal_output_verbose(10, opal_pmix_base_framework.framework_output, "%s pmix:cray: assigned tmp name %d %d pmix_kvs_name %s", OPAL_NAME_PRINT(pmix_pname),pmix_pname.jobid,pmix_pname.vpid,pmix_kvs_name); pmapping = (char*)malloc(PMI2_MAX_VALLEN); if( pmapping == NULL ){ rc = OPAL_ERR_OUT_OF_RESOURCE; OPAL_ERROR_LOG(rc); return rc; } rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found); if( !found || PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr"); return OPAL_ERROR; } pmix_lranks = pmix_cray_parse_pmap(pmapping, pmix_rank, &my_node, &pmix_nlranks); if (NULL == pmix_lranks) { rc = OPAL_ERR_OUT_OF_RESOURCE; OPAL_ERROR_LOG(rc); return rc; } free(pmapping); // setup hash table opal_pmix_base_hash_init(); /* save the job size */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_JOB_SIZE); kv.type = OPAL_UINT32; kv.data.uint32 = pmix_size; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(rc); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* save the appnum */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_APPNUM); kv.type = OPAL_UINT32; kv.data.uint32 = pmix_appnum; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); goto err_exit; } pmix_usize = atoi(buf); OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_UNIV_SIZE); kv.type = OPAL_UINT32; kv.data.uint32 = pmix_usize; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(rc); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_JOBID); kv.type = OPAL_UINT32; kv.data.uint32 = pmix_jobid; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* save the local size */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_LOCAL_SIZE); kv.type = OPAL_UINT16; kv.data.uint16 = pmix_nlranks; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(rc); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); ldr.vpid = pmix_lranks[0]; ldr.jobid = pmix_pname.jobid; /* find ourselves and build up a string for local peer info */ memset(nmtmp, 0, 64); for (i=0; i < pmix_nlranks; i++) { ret = snprintf(nmtmp, 64, "%d", pmix_lranks[i]); opal_argv_append_nosize(&localranks, nmtmp); if (pmix_rank == pmix_lranks[i]) { pmix_lrank = i; pmix_nrank = i; } } str = opal_argv_join(localranks, ','); opal_argv_free(localranks); OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_LOCAL_PEERS); kv.type = OPAL_STRING; kv.data.string = str; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* save the local leader */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_LOCALLDR); kv.type = OPAL_UINT64; kv.data.uint64 = *(uint64_t*)&ldr; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } /* save our local rank */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_LOCAL_RANK); kv.type = OPAL_UINT16; kv.data.uint16 = pmix_lrank; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } /* and our node rank */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_NODE_RANK); kv.type = OPAL_UINT16; kv.data.uint16 = pmix_nrank; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); return OPAL_SUCCESS; err_exit: PMI2_Finalize(); return ret; }
static int s2_init(void) { int spawned, size, rank, appnum; int rc, ret = OPAL_ERROR; char buf[16]; int found; int my_node; uint32_t stepid; int i; opal_process_name_t ldr; opal_value_t kv; char **localranks; char *str; char nmtmp[64]; opal_process_name_t wildcard_rank; /* if we can't startup PMI, we can't be used */ if ( PMI2_Initialized () ) { return OPAL_SUCCESS; } size = -1; rank = -1; appnum = -1; // setup hash table so we always can finalize it opal_pmix_base_hash_init(); if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) { opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc); return OPAL_ERROR; } if( size < 0 || rank < 0 ){ opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true); goto err_exit; } s2_jsize = size; s2_rank = rank; s2_appnum = appnum; pmix_vallen_max = PMI2_MAX_VALLEN; pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility? pmix_keylen_max = PMI2_MAX_KEYLEN; pmix_vallen_threshold = PMI2_MAX_VALLEN * 3; pmix_vallen_threshold >>= 2; pmix_kvs_name = (char*)malloc(pmix_kvslen_max); if( pmix_kvs_name == NULL ){ PMI2_Finalize(); ret = OPAL_ERR_OUT_OF_RESOURCE; goto err_exit; } rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max); if( PMI2_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI2_Job_GetId"); free(pmix_kvs_name); goto err_exit; } /* store our name in the opal_proc_t so that * debug messages will make sense - an upper * layer will eventually overwrite it, but that * won't do any harm */ s2_pname.jobid = strtoul(pmix_kvs_name, &str, 10); s2_pname.jobid = (s2_pname.jobid << 16) & 0xffff0000; if (NULL != str) { stepid = strtoul(str, NULL, 10); s2_pname.jobid |= (stepid & 0x0000ffff); } s2_pname.vpid = s2_rank; opal_proc_set_name(&s2_pname); opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:s2: assigned tmp name", OPAL_NAME_PRINT(s2_pname)); /* setup wildcard rank*/ wildcard_rank = OPAL_PROC_MY_NAME; wildcard_rank.vpid = OPAL_VPID_WILDCARD; /* Slurm PMI provides the job id as an integer followed * by a '.', followed by essentially a stepid. The first integer * defines an overall job number. The second integer is the number of * individual jobs we have run within that allocation. */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_JOBID); kv.type = OPAL_UINT32; kv.data.uint32 = s2_pname.jobid; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&wildcard_rank, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); // frees pmix_kvs_name /* save the job size */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_JOB_SIZE); kv.type = OPAL_UINT32; kv.data.uint32 = size; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard_rank, &kv))) { OPAL_ERROR_LOG(rc); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* save the appnum */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_APPNUM); kv.type = OPAL_UINT32; kv.data.uint32 = appnum; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found); if( PMI2_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); goto err_exit; } /* save it */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_UNIV_SIZE); kv.type = OPAL_UINT32; kv.data.uint32 = atoi(buf); if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard_rank, &kv))) { OPAL_ERROR_LOG(rc); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* push this into the dstore for subsequent fetches */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_MAX_PROCS); kv.type = OPAL_UINT32; kv.data.uint32 = atoi(buf); if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&wildcard_rank, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); char *pmapping = (char*)malloc(PMI2_MAX_VALLEN); if( pmapping == NULL ){ rc = OPAL_ERR_OUT_OF_RESOURCE; OPAL_ERROR_LOG(rc); return rc; } rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found); if( !found || PMI2_SUCCESS != rc ) { OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr"); return OPAL_ERROR; } s2_lranks = mca_common_pmi2_parse_pmap(pmapping, s2_pname.vpid, &my_node, &s2_nlranks); if (NULL == s2_lranks) { rc = OPAL_ERR_OUT_OF_RESOURCE; OPAL_ERROR_LOG(rc); return rc; } free(pmapping); /* save the local size */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_LOCAL_SIZE); kv.type = OPAL_UINT32; kv.data.uint32 = s2_nlranks; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard_rank, &kv))) { OPAL_ERROR_LOG(rc); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); s2_lrank = 0; s2_nrank = 0; ldr.vpid = rank; localranks = NULL; if (0 < s2_nlranks && NULL != s2_lranks) { /* note the local ldr */ ldr.vpid = s2_lranks[0]; /* find ourselves */ ldr.jobid = s2_pname.jobid; ldr.vpid = s2_pname.vpid; memset(nmtmp, 0, 64); for (i=0; i < s2_nlranks; i++) { (void)snprintf(nmtmp, 64, "%d", s2_lranks[i]); opal_argv_append_nosize(&localranks, nmtmp); if (s2_rank == s2_lranks[i]) { s2_lrank = i; s2_nrank = i; } } str = opal_argv_join(localranks, ','); opal_argv_free(localranks); OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_LOCAL_PEERS); kv.type = OPAL_STRING; kv.data.string = str; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&wildcard_rank, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); } /* save the local leader */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_LOCALLDR); kv.type = OPAL_UINT64; kv.data.uint64 = *(uint64_t*)&ldr; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* save our local rank */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_LOCAL_RANK); kv.type = OPAL_UINT16; kv.data.uint16 = s2_lrank; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* and our node rank */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_NODE_RANK); kv.type = OPAL_UINT16; kv.data.uint16 = s2_nrank; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* increment the init count */ ++pmix_init_count; return OPAL_SUCCESS; err_exit: PMI2_Finalize(); return ret; }