int main(int argc, char **argv, char **envp) { int pmi_rank = -1; int pmi_process_group_size = -1; int rc = EXIT_SUCCESS; char *err = NULL; PMI_BOOL pmi_initialized = PMI_FALSE; int i; double pi; int spawned; if (1 < argc) { rc = strtol(argv[1], NULL, 10); } else { rc = 3; } /* sanity */ if (PMI_SUCCESS != PMI_Initialized(&pmi_initialized) || PMI_TRUE == pmi_initialized) { fprintf(stderr, "=== ERROR: PMI sanity failure\n"); return EXIT_FAILURE; } if (PMI_SUCCESS != PMI_Init(&spawned)) { err = "PMI_Init failure!"; goto done; } if (PMI_SUCCESS != PMI_Get_size(&pmi_process_group_size)) { err = "PMI_Get_size failure!"; goto done; } if (PMI_SUCCESS != PMI_Get_rank(&pmi_rank)) { err = "PMI_Get_rank failure!"; goto done; } i = 0; while (1) { i++; pi = i / 3.14159256; if (i > 10000) i = 0; if ((pmi_rank == 3 || (pmi_process_group_size <= 3 && pmi_rank == 0)) && i == 9995) { asprintf(&err, "RANK%d CALLED ABORT", pmi_rank); fprintf(stderr, "%s\n", err); fflush(stderr); PMI_Abort(rc, err); } } done: if (NULL != err) { fprintf(stderr, "=== ERROR [rank:%d] %s\n", pmi_rank, err); rc = EXIT_FAILURE; } return rc; }
static int reinit_pmi(void) { int ret; int has_parent = 0; int pg_rank, pg_size; int kvs_name_sz, pg_id_sz; MPIDI_STATE_DECL(MPID_STATE_REINIT_PMI); MPIDI_FUNC_ENTER(MPID_STATE_REINIT_PMI); /* Init pmi and do some sanity checks */ ret = PMI_Init(&has_parent); CHECK_ERR(ret, "pmi_init"); ret = PMI_Get_rank(&pg_rank); CHECK_ERR(ret, "pmi_get_rank"); ret = PMI_Get_size(&pg_size); CHECK_ERR(ret, "pmi_get_size"); CHECK_ERR(pg_size != MPIDI_Process.my_pg->size, "pg size differs after restart"); CHECK_ERR(pg_rank != MPIDI_Process.my_pg_rank, "pg rank differs after restart"); /* get new pg_id */ ret = PMI_KVS_Get_name_length_max(&pg_id_sz); CHECK_ERR(ret, "pmi_get_id_length_max"); MPIU_Free(MPIDI_Process.my_pg->id); MPIDI_Process.my_pg->id = MPIU_Malloc(pg_id_sz + 1); CHECK_ERR(MPIDI_Process.my_pg->id == NULL, "malloc failed"); ret = PMI_KVS_Get_my_name(MPIDI_Process.my_pg->id, pg_id_sz); CHECK_ERR(ret, "pmi_kvs_get_my_name"); /* get new kvsname */ ret = PMI_KVS_Get_name_length_max(&kvs_name_sz); CHECK_ERR(ret, "PMI_KVS_Get_name_length_max"); MPIU_Free(MPIDI_Process.my_pg->connData); MPIDI_Process.my_pg->connData = MPIU_Malloc(kvs_name_sz + 1); CHECK_ERR(MPIDI_Process.my_pg->connData == NULL, "malloc failed"); ret = PMI_KVS_Get_my_name(MPIDI_Process.my_pg->connData, kvs_name_sz); CHECK_ERR(ret, "PMI_Get_my_name"); MPIDI_FUNC_EXIT(MPID_STATE_REINIT_PMI); return 0; }
static ucs_status_t uct_ugni_fetch_pmi() { int spawned = 0, rc; if(job_info.initialized) { return UCS_OK; } /* Fetch information from Cray's PMI */ rc = PMI_Init(&spawned); if (PMI_SUCCESS != rc) { ucs_error("PMI_Init failed, Error status: %d", rc); return UCS_ERR_IO_ERROR; } ucs_debug("PMI spawned %d", spawned); rc = PMI_Get_size(&job_info.pmi_num_of_ranks); if (PMI_SUCCESS != rc) { ucs_error("PMI_Get_size failed, Error status: %d", rc); return UCS_ERR_IO_ERROR; } ucs_debug("PMI size %d", job_info.pmi_num_of_ranks); rc = PMI_Get_rank(&job_info.pmi_rank_id); if (PMI_SUCCESS != rc) { ucs_error("PMI_Get_rank failed, Error status: %d", rc); return UCS_ERR_IO_ERROR; } ucs_debug("PMI rank %d", job_info.pmi_rank_id); rc = get_ptag(&job_info.ptag); if (UCS_OK != rc) { ucs_error("get_ptag failed, Error status: %d", rc); return rc; } ucs_debug("PMI ptag %d", job_info.ptag); rc = get_cookie(&job_info.cookie); if (UCS_OK != rc) { ucs_error("get_cookie failed, Error status: %d", rc); return rc; } ucs_debug("PMI cookie %d", job_info.cookie); /* Context and domain is activated */ job_info.initialized = true; ucs_debug("UGNI job info was activated"); return UCS_OK; }
/** * This function is specifically written to make sure that HSAM * parameters are configured correctly */ static int check_hsam_parameters() { char *value; int size; /* Get the number of processes */ PMI_Get_size(&size); /* If the number of processes is less than 64, we can afford * to * have more RC QPs and hence a value of 4 is chosen, for * other * cases, a value of 2 is chosen */ /* (rdma_num_qp_per_port/ stripe factor) represents the number * of QPs which will be chosen for data transfer at a given point */ /* If the user has not specified any value, then perform * this tuning */ if ((value = getenv("MV2_NUM_QP_PER_PORT")) != NULL) { rdma_num_qp_per_port = atoi(value); if(rdma_num_qp_per_port <= 2) { stripe_factor = 1; } else { stripe_factor = (rdma_num_qp_per_port / 2); } } else { /* Speculated value */ /* The congestion is actually never seen for less * than 8 nodes */ if((size > 8) && (size < 64)) { rdma_num_qp_per_port = 4; stripe_factor = (rdma_num_qp_per_port / 2); } else { rdma_num_qp_per_port = 2; stripe_factor = 1; } } return MPI_SUCCESS; }
/* gasneti_bootstrapInit */ int gasneti_bootstrapInit_pmi( int *argc_p, char ***argv_p, gasnet_node_t *nodes_p, gasnet_node_t *mynode_p) { int size, rank; #if USE_PMI2_API int spawned, appnum; if (PMI2_SUCCESS != PMI2_Init(&spawned, &size, &rank, &appnum)) { return GASNET_ERR_NOT_INIT; } #else if (PMI_SUCCESS != PMI_Initialized(&gasneti_pmi_initialized)) { return GASNET_ERR_NOT_INIT; } if (PMI_FALSE == gasneti_pmi_initialized) { int spawned; if (PMI_SUCCESS != PMI_Init(&spawned)) { return GASNET_ERR_NOT_INIT; } } if (PMI_SUCCESS != PMI_Get_rank(&rank)) { gasneti_fatalerror("PMI_Get_rank() failed"); } if (PMI_SUCCESS != PMI_Get_size(&size)) { gasneti_fatalerror("PMI_Get_size() failed"); } #endif *mynode_p = rank; *nodes_p = size; #if USE_PMI2_API max_name_len = 1024; /* XXX: can almost certainly be shorter than this! */ max_key_len = PMI2_MAX_KEYLEN; max_val_len = PMI2_MAX_VALLEN; #else if (PMI_SUCCESS != PMI_KVS_Get_name_length_max(&max_name_len)) { gasneti_fatalerror("PMI_KVS_Get_name_length_max() failed"); } if (PMI_SUCCESS != PMI_KVS_Get_key_length_max(&max_key_len)) { gasneti_fatalerror("PMI_KVS_Get_key_length_max() failed"); } if (PMI_SUCCESS != PMI_KVS_Get_value_length_max(&max_val_len)) { gasneti_fatalerror("PMI_KVS_Get_value_length_max() failed"); } #endif kvs_name = (char*) gasneti_malloc(max_name_len); kvs_key = (char*) gasneti_malloc(max_key_len); kvs_value = (char*) gasneti_malloc(max_val_len); max_val_bytes = 4 * (max_val_len / 5); #if USE_PMI2_API if (PMI2_SUCCESS != PMI2_Job_GetId(kvs_name, max_name_len)) { gasneti_fatalerror("PMI2_Job_GetId() failed"); } #else if (PMI_SUCCESS != PMI_KVS_Get_my_name(kvs_name, max_name_len)) { gasneti_fatalerror("PMI_KVS_Get_my_name() failed"); } #endif return GASNET_OK; }
/** * Initialize Process Manager Interface and update global_info. * Called by MPID_nem_ib_init. * * -# Initialize the Process Manager Interface; * -# Set the rank; * -# Set the progexx group size; * * \see MPID_nem_ib_init * \todo Need to add more stuff here Look at InitPG in mvapich2/trunk/src/mpid/ch3/src/mpid_init.c */ int MPID_nem_ib_pmi_init() { int pmi_errno = 0; int mpi_errno = MPI_SUCCESS; int spawned; /* Process group id size*/ int pg_id_sz, pg_size; char *pg_id; MPIDI_PG_t *pg = 0; assert( global_info!= NULL ); MPIDI_STATE_DECL(MPID_STATE_MPIDI_PMI_INIT); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PMI_INIT); /* Initialize the Process Manager Interface */ pmi_errno = PMI_Init(&spawned); if (pmi_errno != PMI_SUCCESS) { MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**pmi_init", "**pmi_init %d", pmi_errno); } /* Set the rank */ pmi_errno = PMI_Get_rank(&global_info->pg_rank); if (pmi_errno != PMI_SUCCESS) { MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**pmi_get_rank", "**pmi_get_rank %d", pmi_errno); } /* Set the progexx group size */ pmi_errno = PMI_Get_size(&global_info->pg_size); if (pmi_errno != 0) { MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**pmi_get_size", "**pmi_get_size %d", pmi_errno); } /* -------------------------------------- From InitPG in mvapich2/trunk/src/mpid/ch3/src/mpid_init.c pmi_errno = PMI_Get_appnum(&appnum); if (pmi_errno != PMI_SUCCESS) { MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**pmi_get_appnum", "**pmi_get_appnum %d", pmi_errno); } / * Note that if pmi is not availble, the value of MPI_APPNUM is not set * / if (appnum != -1) { MPIR_Process.attrs.appnum = appnum; } */ /* Now, initialize the process group information with PMI calls */ /* * Get the process group id */ pmi_errno = PMI_KVS_Get_name_length_max(&pg_id_sz); if (pmi_errno != PMI_SUCCESS) { /* * I don't believe that MPICH2 has updated the error message for this * yet. */ MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**pmi_get_id_length_max", "**pmi_get_id_length_max %d", pmi_errno); } /* This memory will be freed by the PG_Destroy if there is an error */ pg_id = MPIU_Malloc(pg_id_sz + 1); if (pg_id == NULL) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**nomem"); } /* Note in the singleton init case, the pg_id is a dummy. We'll want to replace this value if we join a Process manager */ pmi_errno = PMI_KVS_Get_my_name(pg_id, pg_id_sz); if (pmi_errno != PMI_SUCCESS) { /* * I don't believe the MPICH2 team has updated the error message for * this change yet. */ MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**pmi_get_id", "**pmi_get_id %d", pmi_errno); } /* * Create a new structure to track the process group for our MPI_COMM_WORLD */ mpi_errno = MPIDI_PG_Create(pg_size, pg_id, &pg); if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**dev|pg_create"); } MPIDI_PG_InitConnKVS( pg ); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PMI_INIT); return mpi_errno; fn_fail: if (pg) { MPIDI_PG_Destroy( pg ); } goto fn_exit; }
int main(int argc, char **argv, char **envp) { int i,ret,*npes; int num_interfaces; ptl_handle_ni_t nih; ptl_handle_eq_t eqh; ptl_ni_limits_t ptl_limits; pid_t child; ptl_process_id_t rnk,*procid_map; int spv,*spawned=&spv; if ((ret=PtlInit(&num_interfaces)) != PTL_OK) { printf("%s: PtlInit failed: %d\n", __FUNCTION__, ret); exit(1); } printf("%s: PtlInit succeeds (%d)\n", __FUNCTION__, ret); #ifdef FORK_BEFORE_NI_INIT child = fork(); #endif if((ret=PtlNIInit(IFACE_FROM_BRIDGE_AND_NALID(PTL_BRIDGE_UK, PTL_IFACE_SS), PTL_PID_ANY, NULL, &ptl_limits, &nih)) != PTL_OK) { printf("%s: PtlNIInit failed: %d\n", __FUNCTION__, ret); /*exit(1);*/ } else printf("%s: PtlNIInit succeeds (%d)\n", __FUNCTION__, ret); #ifdef FORK_AFTER_NI_INIT child = fork(); #endif if ((ret=PtlEQAlloc(nih, 4096, NULL, &eqh)) != PTL_OK) { printf("%s: PtlEQAlloc failed: %d(%d)\n", __FUNCTION__, ret, child); exit(1); } printf("%s: PtlEQAlloc succeeds (%d:%d)\n", __FUNCTION__, child, ret); #if 1 if(child){ MPI_Init(&argc,&argv); } if(child){ PMI_Init(spawned); printf("\n%d:spanwned=%d",child,*spawned); if ((ret=PMI_Get_size(npes)) != PMI_SUCCESS) { printf("%s: PMI_Get_size failed: %d\n", __FUNCTION__, ret); /*exit(1);*/ } else printf("%s: PMI_Get_size succeeds (%d)\n", __FUNCTION__,*npes); /*procid_map = (ptl_process_id_t *)malloc(sizeof(ptl_process_id_t)*(*npes)); if(procid_map==NULL)exit(1);*/ if((ret = PMI_CNOS_Get_nidpid_map(&procid_map)) != PMI_SUCCESS) { printf("Getting proc map failed (npes=%d)\n", *npes); } for(i=0;i<*npes;i++){ printf("\npid=%d nid=%d npes=%d(%d)",procid_map[i].pid,procid_map[i].nid,*npes,child); } } #endif if((ret=PtlGetId(nih,&rnk)) !=PTL_OK) { printf("%s: PtlGetId failed: %d(%d)\n", __FUNCTION__, ret, child); exit(1); } printf("%s: nid=%d pid=%d(%d)\n",__FUNCTION__,rnk.nid,rnk.pid,child); if(child){ MPI_Finalize(); printf("%s: mpi_init and finalize succeed(%d)\n",__FUNCTION__,child); } }
static bool cray_get_attr(const char *attr, opal_value_t **kv) { int rc, i; opal_value_t *kp; if (0 == strcmp(PMIX_JOBID, attr)) { kp = OBJ_NEW(opal_value_t); kp->key = strdup(attr); kp->type = OPAL_UINT32; kp->data.uint32 = pmix_jobid; *kv = kp; return true; } if (0 == strcmp(PMIX_RANK, attr)) { rc = PMI_Get_rank(&i); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_rank"); return false; } kp = OBJ_NEW(opal_value_t); kp->key = strdup(attr); kp->type = OPAL_UINT32; kp->data.uint32 = i; *kv = kp; return true; } if (0 == strcmp(PMIX_UNIV_SIZE, attr)) { rc = PMI_Get_universe_size(&i); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); return false; } kp = OBJ_NEW(opal_value_t); kp->key = strdup(attr); kp->type = OPAL_UINT32; kp->data.uint32 = i; *kv = kp; return true; } if (0 == strcmp(PMIX_JOB_SIZE, attr)) { rc = PMI_Get_size(&i); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_size"); return false; } kp = OBJ_NEW(opal_value_t); kp->key = strdup(attr); kp->type = OPAL_UINT32; kp->data.uint32 = i; *kv = kp; return true; } if (0 == strcmp(PMIX_APPNUM, attr)) { rc = PMI_Get_appnum(&i); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_appnum"); return false; } kp = OBJ_NEW(opal_value_t); kp->key = strdup(attr); kp->type = OPAL_UINT32; kp->data.uint32 = i; *kv = kp; return true; } if (0 == strcmp(PMIX_LOCAL_RANK, attr)) { kp = OBJ_NEW(opal_value_t); kp->key = strdup(attr); kp->type = OPAL_UINT32; kp->data.uint32 = pmix_lrank; *kv = kp; return true; } if (0 == strcmp(PMIX_NODE_RANK, attr)) { kp = OBJ_NEW(opal_value_t); kp->key = strdup(attr); kp->type = OPAL_UINT32; kp->data.uint32 = pmix_nrank; *kv = kp; return true; } if (0 == strcmp(PMIX_LOCAL_SIZE, attr)) { kp = OBJ_NEW(opal_value_t); kp->key = strdup(attr); kp->type = OPAL_UINT32; kp->data.uint32 = pmix_nlranks; *kv = kp; return true; } return OPAL_ERR_NOT_IMPLEMENTED; }
static int test_item1(void) { int rc = 0; int val = 0; log_assert(spawned == PMI_FALSE || spawned == PMI_TRUE, ""); if (PMI_SUCCESS != (rc = PMI_Get_size(&size))) { log_fatal("PMI_Get_Size failed: %d\n", rc); return rc; } log_assert(size >= 0, ""); if (PMI_SUCCESS != (rc = PMI_Get_rank(&rank))) { log_fatal("PMI_Get_Rank failed: %d\n", rc); return rc; } log_assert(rank >= 0, ""); log_assert(rank < size, ""); if (PMI_SUCCESS != (rc = PMI_Get_appnum(&appnum))) { log_fatal("PMI_Get_appnum failed: %d\n", rc); return rc; } log_info("spawned=%d size=%d rank=%d appnum=%d\n", spawned, size, rank, appnum); val = random_value(10, 100); if (PMI_SUCCESS != (rc = PMI_Get_universe_size(&val))) { log_fatal("PMI_Get_universe_size failed: %d\n", rc); return rc; } log_assert(size == val, ""); val = random_value(10, 100); if (PMI_SUCCESS != (rc = PMI_Get_id_length_max(&val))) { log_fatal("PMI_Get_id_length_max failed: %d\n", rc); return rc; } log_info("PMI_Get_id_length_max=%d\n", val); if (!_legacy) { log_assert(sizeof(jobid) == val, "Check PMIX_MAX_NSLEN value in pmix_common.h"); } sprintf(jobid, "%s", __func__); if (PMI_SUCCESS != (rc = PMI_Get_id(jobid, sizeof(jobid)))) { log_fatal("PMI_Get_id failed: %d\n", rc); return rc; } log_info("jobid=%s\n", jobid); log_assert(memcmp(jobid, __func__, sizeof(__func__)), ""); sprintf(jobid, "%s", __func__); if (PMI_SUCCESS != (rc = PMI_Get_kvs_domain_id(jobid, sizeof(jobid)))) { log_fatal("PMI_Get_kvs_domain_id failed: %d\n", rc); return rc; } log_info("PMI_Get_kvs_domain_id=%s\n", jobid); log_assert(memcmp(jobid, __func__, sizeof(__func__)), ""); sprintf(jobid, "%s", __func__); if (PMI_SUCCESS != (rc = PMI_KVS_Get_my_name(jobid, sizeof(jobid)))) { log_fatal("PMI_KVS_Get_my_name failed: %d\n", rc); return rc; } log_info("PMI_KVS_Get_my_name=%s\n", jobid); log_assert(memcmp(jobid, __func__, sizeof(__func__)), ""); return rc; }
int main(int argc, char **argv, char **envp) { int pmi_rank = -1; int pmi_process_group_size = -1; int num_local_procs = 0; int *local_rank_ids = NULL; int spawned = PMI_FALSE; int rc = EXIT_SUCCESS; char *err = NULL; PMI_BOOL pmi_initialized = PMI_FALSE; int pmi_vallen_max, max_length; char *pmi_kvs_name; /* sanity */ if (PMI_SUCCESS != PMI_Initialized(&pmi_initialized) || PMI_TRUE == pmi_initialized) { fprintf(stderr, "=== ERROR: PMI sanity failure\n"); return EXIT_FAILURE; } if (PMI_SUCCESS != PMI_Init(&spawned)) { err = "PMI_Init failure!"; goto done; } if (PMI_SUCCESS != PMI_Get_size(&pmi_process_group_size)) { err = "PMI_Get_size failure!"; goto done; } if (PMI_SUCCESS != PMI_Get_rank(&pmi_rank)) { err = "PMI_Get_rank failure!"; goto done; } if (PMI_SUCCESS != PMI_Get_clique_size(&num_local_procs)) { err = "PMI_Get_clique_size failure!"; goto done; } if (PMI_SUCCESS != PMI_KVS_Get_value_length_max(&pmi_vallen_max)) { err = "PMI_KVS_Get_value_length_max failure!"; goto done; } if (PMI_SUCCESS != PMI_KVS_Get_name_length_max(&max_length)) { err = "PMI_KVS_Get_name_length_max failure!"; goto done; } pmi_kvs_name = (char*)malloc(max_length); if (NULL == pmi_kvs_name) { err = "malloc failure!"; goto done; } if (PMI_SUCCESS != PMI_KVS_Get_my_name(pmi_kvs_name,max_length)) { err = "PMI_KVS_Get_my_name failure!"; goto done; } if (NULL == (local_rank_ids = calloc(num_local_procs, sizeof(int)))) { err = "out of resources"; goto done; } if (PMI_SUCCESS != PMI_Get_clique_ranks(local_rank_ids, num_local_procs)) { err = "PMI_Get_clique_size failure!"; goto done; } /* lowest local rank will print env info and tag its output*/ // if (pmi_rank == local_rank_ids[0]) { // for (; NULL != envp && NULL != *envp; ++envp) { // printf("===[%d]: %s\n", pmi_rank, *envp); // } //} done: if (PMI_TRUE == pmi_initialized) { if (PMI_SUCCESS != PMI_Finalize()) { err = "PMI_Finalize failure!"; } } if (NULL != err) { fprintf(stderr, "=== ERROR [rank:%d] %s\n", pmi_rank, err); rc = EXIT_FAILURE; } return rc; }
int PMI_Get_universe_size (int *size) { return PMI_Get_size(size); }
static int s1_init(void) { PMI_BOOL initialized; int spawned; int rc, ret = OPAL_ERROR; int i, rank, lrank, nrank; char *pmix_id, tmp[64]; opal_value_t kv; char *str; uint32_t ui32; opal_process_name_t ldr; char **localranks=NULL; if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) { OPAL_PMI_ERROR(rc, "PMI_Initialized"); return OPAL_ERROR; } if (PMI_TRUE != initialized && PMI_SUCCESS != (rc = PMI_Init(&spawned))) { OPAL_PMI_ERROR(rc, "PMI_Init"); return OPAL_ERROR; } // setup hash table opal_pmix_base_hash_init(); // Initialize space demands rc = PMI_KVS_Get_value_length_max(&pmix_vallen_max); if (PMI_SUCCESS != rc) { OPAL_PMI_ERROR(rc, "PMI_KVS_Get_value_length_max"); goto err_exit; } pmix_vallen_threshold = pmix_vallen_max * 3; pmix_vallen_threshold >>= 2; rc = PMI_KVS_Get_name_length_max(&pmix_kvslen_max); if (PMI_SUCCESS != rc) { OPAL_PMI_ERROR(rc, "PMI_KVS_Get_name_length_max"); goto err_exit; } rc = PMI_KVS_Get_key_length_max(&pmix_keylen_max); if (PMI_SUCCESS != rc) { OPAL_PMI_ERROR(rc, "PMI_KVS_Get_key_length_max"); goto err_exit; } // Initialize job environment information pmix_id = (char*)malloc(pmix_vallen_max); if (pmix_id == NULL) { ret = OPAL_ERR_OUT_OF_RESOURCE; goto err_exit; } /* Get domain id */ if (PMI_SUCCESS != (rc = PMI_Get_kvs_domain_id(pmix_id, pmix_vallen_max))) { free(pmix_id); goto err_exit; } /* get our rank */ ret = PMI_Get_rank(&rank); if( PMI_SUCCESS != ret ) { OPAL_PMI_ERROR(ret, "PMI_Get_rank"); free(pmix_id); goto err_exit; } /* Slurm PMI provides the job id as an integer followed * by a '.', followed by essentially a stepid. The first integer * defines an overall job number. The second integer is the number of * individual jobs we have run within that allocation. */ s1_pname.jobid = strtoul(pmix_id, &str, 10); s1_pname.jobid = (s1_pname.jobid << 16) & 0xffff0000; if (NULL != str) { ui32 = strtoul(str, NULL, 10); s1_pname.jobid |= (ui32 & 0x0000ffff); } ldr.jobid = s1_pname.jobid; s1_pname.vpid = rank; /* store our name in the opal_proc_t so that * debug messages will make sense - an upper * layer will eventually overwrite it, but that * won't do any harm */ opal_proc_set_name(&s1_pname); opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:s1: assigned tmp name", OPAL_NAME_PRINT(s1_pname)); OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_JOBID); kv.type = OPAL_UINT32; kv.data.uint32 = s1_pname.jobid; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* save it */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_RANK); kv.type = OPAL_UINT32; kv.data.uint32 = rank; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); pmix_kvs_name = (char*)malloc(pmix_kvslen_max); if (pmix_kvs_name == NULL) { ret = OPAL_ERR_OUT_OF_RESOURCE; goto err_exit; } rc = PMI_KVS_Get_my_name(pmix_kvs_name, pmix_kvslen_max); if (PMI_SUCCESS != rc) { OPAL_PMI_ERROR(rc, "PMI_KVS_Get_my_name"); goto err_exit; } /* get our local proc info to find our local rank */ if (PMI_SUCCESS != (rc = PMI_Get_clique_size(&nlranks))) { OPAL_PMI_ERROR(rc, "PMI_Get_clique_size"); return rc; } /* save the local size */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_LOCAL_SIZE); kv.type = OPAL_UINT32; kv.data.uint32 = nlranks; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); lrank = 0; nrank = 0; ldr.vpid = rank; if (0 < nlranks) { /* now get the specific ranks */ lranks = (int*)calloc(nlranks, sizeof(int)); if (NULL == lranks) { rc = OPAL_ERR_OUT_OF_RESOURCE; OPAL_ERROR_LOG(rc); return rc; } if (PMI_SUCCESS != (rc = PMI_Get_clique_ranks(lranks, nlranks))) { OPAL_PMI_ERROR(rc, "PMI_Get_clique_ranks"); free(lranks); return rc; } /* note the local ldr */ ldr.vpid = lranks[0]; /* save this */ memset(tmp, 0, 64); for (i=0; i < nlranks; i++) { (void)snprintf(tmp, 64, "%d", lranks[i]); opal_argv_append_nosize(&localranks, tmp); if (rank == lranks[i]) { lrank = i; nrank = i; } } str = opal_argv_join(localranks, ','); opal_argv_free(localranks); OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_LOCAL_PEERS); kv.type = OPAL_STRING; kv.data.string = str; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); } /* save the local leader */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_LOCALLDR); kv.type = OPAL_UINT64; kv.data.uint64 = *(uint64_t*)&ldr; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* save our local rank */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_LOCAL_RANK); kv.type = OPAL_UINT16; kv.data.uint16 = lrank; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* and our node rank */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_NODE_RANK); kv.type = OPAL_UINT16; kv.data.uint16 = nrank; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* get universe size */ ret = PMI_Get_universe_size(&i); if (PMI_SUCCESS != ret) { OPAL_PMI_ERROR(ret, "PMI_Get_universe_size"); goto err_exit; } /* push this into the dstore for subsequent fetches */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_UNIV_SIZE); kv.type = OPAL_UINT32; kv.data.uint32 = i; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* get job size */ ret = PMI_Get_size(&i); if (PMI_SUCCESS != ret) { OPAL_PMI_ERROR(ret, "PMI_Get_size"); goto err_exit; } OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_JOB_SIZE); kv.type = OPAL_UINT32; kv.data.uint32 = i; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* get appnum */ ret = PMI_Get_appnum(&i); if (PMI_SUCCESS != ret) { OPAL_PMI_ERROR(ret, "PMI_Get_appnum"); goto err_exit; } OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_APPNUM); kv.type = OPAL_UINT32; kv.data.uint32 = i; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* increment the init count */ ++pmix_init_count; return OPAL_SUCCESS; err_exit: PMI_Finalize(); return ret; }
main (int argc, char **argv) { int i, j, rc; int nprocs, procid; int clique_size, *clique_ranks = NULL; char *jobid_ptr, *nprocs_ptr, *procid_ptr; int pmi_rank, pmi_size, kvs_name_len, key_len, val_len; PMI_BOOL initialized; char *key, *val, *kvs_name; struct timeval tv1, tv2; long delta_t; char tv_str[20]; gettimeofday(&tv1, NULL); /* Get process count and our id from environment variables */ jobid_ptr = getenv("SLURM_JOB_ID"); nprocs_ptr = getenv("SLURM_NPROCS"); procid_ptr = getenv("SLURM_PROCID"); if (jobid_ptr == NULL) { printf("WARNING: PMI test not run under SLURM\n"); nprocs = 1; procid = 0; } else if ((nprocs_ptr == NULL) || (procid_ptr == NULL)) { printf("FAILURE: SLURM environment variables not set\n"); exit(1); } else { nprocs = atoi(nprocs_ptr); procid = atoi(procid_ptr); } /* Validate process count and our id */ if ((nprocs < 1) || (nprocs > 9999)) { printf("FAILURE: Invalid nprocs %s\n", nprocs_ptr); exit(1); } if ((procid < 0) || (procid > 9999)) { printf("FAILURE: Invalid procid %s\n", procid_ptr); exit(1); } /* Get process count and size from PMI and validate */ if ((rc = PMI_Init(&i)) != PMI_SUCCESS) { printf("FAILURE: PMI_Init: %d\n", rc); exit(1); } initialized = PMI_FALSE; if ((rc = PMI_Initialized(&initialized)) != PMI_SUCCESS) { printf("FAILURE: PMI_Initialized: %d\n", rc); exit(1); } if (initialized != PMI_TRUE) { printf("FAILURE: PMI_Initialized returned false\n"); exit(1); } if ((rc = PMI_Get_rank(&pmi_rank)) != PMI_SUCCESS) { printf("FAILURE: PMI_Get_rank: %d\n", rc); exit(1); } #if _DEBUG printf("PMI_Get_rank = %d\n", pmi_rank); #endif if ((rc = PMI_Get_size(&pmi_size)) != PMI_SUCCESS) { printf("FAILURE: PMI_Get_size: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_Get_size = %d\n", pmi_size); #endif if (pmi_rank != procid) { printf("FAILURE: Rank(%d) != PROCID(%d)\n", pmi_rank, procid); exit(1); } if (pmi_size != nprocs) { printf("FAILURE: Size(%d) != NPROCS(%d), task %d\n", pmi_size, nprocs, pmi_rank); exit(1); } if ((rc = PMI_Get_clique_size(&clique_size)) != PMI_SUCCESS) { printf("FAILURE: PMI_Get_clique_size: %d, task %d\n", rc, pmi_rank); exit(1); } clique_ranks = malloc(sizeof(int) * clique_size); if ((rc = PMI_Get_clique_ranks(clique_ranks, clique_size)) != PMI_SUCCESS) { printf("FAILURE: PMI_Get_clique_ranks: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG for (i=0; i<clique_size; i++) printf("PMI_Get_clique_ranks[%d]=%d\n", i, clique_ranks[i]); #endif free(clique_ranks); if ((rc = PMI_KVS_Get_name_length_max(&kvs_name_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get_name_length_max: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Get_name_length_max = %d\n", kvs_name_len); #endif kvs_name = malloc(kvs_name_len); if ((rc = PMI_KVS_Get_my_name(kvs_name, kvs_name_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get_my_name: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Get_my_name = %s\n", kvs_name); #endif if ((rc = PMI_KVS_Get_key_length_max(&key_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get_key_length_max: %d, task %d\n", rc, pmi_rank); exit(1); } key = malloc(key_len); if ((rc = PMI_KVS_Get_value_length_max(&val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get_value_length_max: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Get_value_length_max = %d\n", val_len); #endif val = malloc(val_len); /* Build and set some key=val pairs */ snprintf(key, key_len, "ATTR_1_%d", procid); snprintf(val, val_len, "A%d", procid+OFFSET_1); if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n", kvs_name, key, val, rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val); #endif snprintf(key, key_len, "attr_2_%d", procid); snprintf(val, val_len, "B%d", procid+OFFSET_2); if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n", kvs_name, key, val, rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val); #endif /* Sync KVS across all tasks */ if ((rc = PMI_KVS_Commit(kvs_name)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Commit: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Commit completed\n"); #endif if ((rc = PMI_Barrier()) != PMI_SUCCESS) { printf("FAILURE: PMI_Barrier: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_Barrier completed\n"); #endif /* Now lets get all keypairs and validate */ for (i=0; i<pmi_size; i++) { snprintf(key, key_len, "ATTR_1_%d", i); if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n", key, rc, pmi_rank); exit(1); } if ((val[0] != 'A') || ((atoi(&val[1])-OFFSET_1) != i)) { printf("FAILURE: Bad keypair %s=%s, task %d\n", key, val, pmi_rank); exit(1); } #if _DEBUG if ((pmi_size <= 8) && (pmi_rank == 0)) /* limit output */ printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val); #endif snprintf(key, key_len, "attr_2_%d", i); if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n", key, rc, pmi_rank); exit(1); } if ((val[0] != 'B') || ((atoi(&val[1])-OFFSET_2) != i)) { printf("FAILURE: Bad keypair %s=%s, task %d\n", key,val, pmi_rank); exit(1); } #if _DEBUG if ((pmi_size <= 8) && (pmi_rank == 1)) /* limit output */ printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val); #endif } /* use iterator */ if ((rc = PMI_KVS_Iter_first(kvs_name, key, key_len, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_iter_first: %d, task %d\n", rc, pmi_rank); exit(1); } for (i=0; ; i++) { if (key[0] == '\0') { if (i != (pmi_size * 2)) { printf("FAILURE: PMI_KVS_iter_next " "cycle count(%d, %d), task %d\n", i, pmi_size, pmi_rank); } break; } #if _DEBUG if ((pmi_size <= 8) && (pmi_rank == 1)) { /* limit output */ printf("PMI_KVS_Iter_next(%s,%d): %s=%s\n", kvs_name, i, key, val); } #endif if ((rc = PMI_KVS_Iter_next(kvs_name, key, key_len, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_iter_next: %d, task %d\n", rc, pmi_rank); exit(1); } } /* Build some more key=val pairs */ snprintf(key, key_len, "ATTR_3_%d", procid); snprintf(val, val_len, "C%d", procid+OFFSET_1); if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n", kvs_name, key, val, rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val); #endif snprintf(key, key_len, "attr_4_%d", procid); snprintf(val, val_len, "D%d", procid+OFFSET_2); if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n", kvs_name, key, val, rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val); #endif /* Sync KVS across all tasks */ if ((rc = PMI_KVS_Commit(kvs_name)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Commit: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Commit completed\n"); #endif if ((rc = PMI_Barrier()) != PMI_SUCCESS) { printf("FAILURE: PMI_Barrier: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_Barrier completed\n"); #endif /* Now lets get some keypairs and validate */ for (i=0; i<pmi_size; i++) { snprintf(key, key_len, "ATTR_1_%d", i); if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n", key, rc, pmi_rank); exit(1); } if ((val[0] != 'A') || ((atoi(&val[1])-OFFSET_1) != i)) { printf("FAILURE: Bad keypair %s=%s, task %d\n", key, val, pmi_rank); exit(1); } #if _DEBUG if ((pmi_size <= 8) && (pmi_rank == 1)) /* limit output */ printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val); #endif snprintf(key, key_len, "attr_4_%d", i); if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n", key, rc, pmi_rank); exit(1); } if ((val[0] != 'D') || ((atoi(&val[1])-OFFSET_2) != i)) { printf("FAILURE: Bad keypair %s=%s, task %d\n", key,val, pmi_rank); exit(1); } #if _DEBUG if ((pmi_size <= 8) && (pmi_rank == 1)) /* limit output */ printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val); #endif } /* Replicate the very heavy load that MVAPICH2 puts on PMI * This load exceeds that of MPICH2 by a very wide margin */ #if _DEBUG printf("Starting %d iterations each with %d PMI_KVS_Put and \n" " one each PMI_KVS_Commit and KVS_Barrier\n", BARRIER_CNT, PUTS_PER_BARRIER); fflush(stdout); #endif for (i=0; i<BARRIER_CNT; i++) { for (j=0; j<PUTS_PER_BARRIER; j++) { snprintf(key, key_len, "ATTR_%d_%d_%d", i, j, procid); snprintf(val, val_len, "C%d", procid+OFFSET_1); if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put(%s,%s,%s): " "%d, task %d\n", kvs_name, key, val, rc, pmi_rank); exit(1); } } if ((rc= PMI_KVS_Commit(kvs_name)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Commit: %d, task %d\n", rc, pmi_rank); exit(1); } if ((rc = PMI_Barrier()) != PMI_SUCCESS) { printf("FAILURE: PMI_Barrier: %d, task %d\n", rc, pmi_rank); exit(1); } /* Don't bother with PMI_KVS_Get as those are all local * and do not put a real load on srun or the network */ } #if _DEBUG printf("Interative PMI calls successful\n"); #endif /* create new keyspace and test it */ if ((rc = PMI_KVS_Create(kvs_name, kvs_name_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Create: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Create %s\n", kvs_name); #endif if ((rc = PMI_KVS_Put(kvs_name, "KVS_KEY", "KVS_VAL")) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Put(%s,KVS_KEY,KVS_VAL)\n", kvs_name); #endif if ((rc = PMI_KVS_Get(kvs_name, "KVS_KEY", val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get(%s, KVS_KEY): %d, task %d\n", kvs_name, rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, "KVS_KEY", val); #endif if ((rc = PMI_KVS_Destroy(kvs_name)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Destroy(%s): %d, task %d\n", kvs_name, rc, pmi_rank); exit(1); } if ((rc = PMI_KVS_Get(kvs_name, "KVS_KEY", val, val_len)) != PMI_ERR_INVALID_KVS) { printf("FAILURE: PMI_KVS_Get(%s, KVS_KEY): %d, task %d\n", kvs_name, rc, pmi_rank); exit(1); } if ((rc = PMI_Finalize()) != PMI_SUCCESS) { printf("FAILURE: PMI_Finalize: %d, task %d\n", rc, pmi_rank); exit(1); } if (_DEBUG || (pmi_rank < 4)) { gettimeofday(&tv2, NULL); delta_t = (tv2.tv_sec - tv1.tv_sec) * 1000000; delta_t += tv2.tv_usec - tv1.tv_usec; snprintf(tv_str, sizeof(tv_str), "usec=%ld", delta_t); printf("PMI test ran successfully, for task %d, %s\n", pmi_rank, tv_str); } if (pmi_rank == 0) { printf("NOTE: All failures reported, "); printf("but only first four successes reported\n"); } exit(0); }
int pmgr_init(int *argc_p, char ***argv_p, int *np_p, int *me_p, int *id_p) { setvbuf(stdout, NULL, _IONBF, 0); char *value; struct timeval start, end; pmgr_gettimeofday(&start); pmgr_echo_debug = 0; pmgr_tree_init_null(&pmgr_tree_all); /* ======================================================= * Until told otherwise, assume we are rank 0 of a 1-task MPI job * this enables serial launching, e.g., "./mpiHello" vs "mpirun -np 1 ./mpiHello" * TODO: may want to protect this usage via a compile flag and/or env var * ======================================================= */ /* Take a stab at something unique for the id (timestamp.secs | pid) * TODO: !!NOTE!! * Using a pid in the jobid *ONLY* works for a single process job * Obviously, multiple tasks will have different pids */ pmgr_id = 0x7FFFFFFF & ((start.tv_sec << 16) | (0x0000FFFF & getpid())); pmgr_me = 0; pmgr_nprocs = 1; mpirun_hostname = NULL; mpirun_port = 1; /* ======================================================= * Get information from environment, not from the argument list * ======================================================= */ /* if MPIRUN_RANK is set, require RANK, NPROCS, ID, HOST, and PORT to all be set */ /* this ensures that if one process aborts in a multitask job, * then something is there to abort the others, namely the mpirun process */ if ((value = pmgr_getenv("MPIRUN_RANK", ENV_OPTIONAL)) != NULL) { /* MPI rank of current process */ pmgr_me = atoi(pmgr_getenv("MPIRUN_RANK", ENV_REQUIRED)); /* number of MPI processes in job */ pmgr_nprocs = atoi(pmgr_getenv("MPIRUN_NPROCS", ENV_REQUIRED)); /* unique jobid of current application */ pmgr_id = atoi(pmgr_getenv("MPIRUN_ID", ENV_REQUIRED)); /* mpirun host IP string in dotted decimal notation */ mpirun_hostname = strdup(pmgr_getenv("MPIRUN_HOST", ENV_REQUIRED)); /* mpirun port number */ mpirun_port = atoi(pmgr_getenv("MPIRUN_PORT", ENV_REQUIRED)); } if ((value = pmgr_getenv("MPIRUN_OPEN_TIMEOUT", ENV_OPTIONAL))) { mpirun_open_timeout = atoi(value); } if ((value = pmgr_getenv("MPIRUN_CONNECT_TRIES", ENV_OPTIONAL))) { mpirun_connect_tries = atoi(value); } /* seconds */ if ((value = pmgr_getenv("MPIRUN_CONNECT_TIMEOUT", ENV_OPTIONAL))) { mpirun_connect_timeout = atoi(value); } /* seconds */ if ((value = pmgr_getenv("MPIRUN_CONNECT_BACKOFF", ENV_OPTIONAL))) { mpirun_connect_backoff = atoi(value); } /* enable/disable radomized option in backoff */ if ((value = pmgr_getenv("MPIRUN_CONNECT_RANDOM", ENV_OPTIONAL))) { mpirun_connect_random = atoi(value); } /* whether to connect tree from parent to children (down) or children to parent (up) */ if ((value = pmgr_getenv("MPIRUN_CONNECT_DOWN", ENV_OPTIONAL))) { mpirun_connect_down = atoi(value); } /* MPIRUN_USE_TREES={0,1} disables/enables tree algorithms */ if ((value = pmgr_getenv("MPIRUN_USE_TREES", ENV_OPTIONAL))) { mpirun_use_trees = atoi(value); } /* use pmi instead of socket connections to mpirun */ if ((value = pmgr_getenv("MPIRUN_PMI_ENABLE", ENV_OPTIONAL))) { #ifdef HAVE_PMI mpirun_pmi_enable = atoi(value); #else /* ifdef HAVE_PMI */ /* PMI was not compiled in, warn user that we're ignoring this value */ if (pmgr_me == 0) { pmgr_error("Not built with PMI support, ignoring MPIRUN_USE_PMI @ %s:%d", __FILE__, __LINE__ ); } #endif /* ifdef HAVE_PMI */ } /* whether to use /dev/shm to start jobs */ if ((value = pmgr_getenv("MPIRUN_SHM_ENABLE", ENV_OPTIONAL))) { mpirun_shm_enable = atoi(value); } /* minimum number of tasks to switch to /dev/shm */ if ((value = pmgr_getenv("MPIRUN_SHM_THRESHOLD", ENV_OPTIONAL))) { mpirun_shm_threshold = atoi(value); } /* whether to authenticate connections */ if ((value = pmgr_getenv("MPIRUN_AUTHENTICATE_ENABLE", ENV_OPTIONAL))) { mpirun_authenticate_enable = atoi(value); } /* time to wait for a reply when authenticating a new connection (miilisecs) */ if ((value = pmgr_getenv("MPIRUN_AUTHENTICATE_TIMEOUT", ENV_OPTIONAL))) { mpirun_authenticate_timeout = atoi(value); } /* total time to attempt to connect to a host before aborting (seconds) */ if ((value = pmgr_getenv("MPIRUN_PORT_SCAN_TIMEOUT", ENV_OPTIONAL))) { mpirun_port_scan_timeout = atoi(value); } /* time to wait on connect call before giving up (millisecs) */ if ((value = pmgr_getenv("MPIRUN_PORT_SCAN_CONNECT_TIMEOUT", ENV_OPTIONAL))) { mpirun_port_scan_connect_timeout = atoi(value); } /* number of times to attempt connect call to given IP:port */ if ((value = pmgr_getenv("MPIRUN_PORT_SCAN_CONNECT_ATTEMPTS", ENV_OPTIONAL))) { mpirun_port_scan_connect_attempts = atoi(value); } /* time to wait between making consecutive connect attempts to a given IP:port (millisecs) */ if ((value = pmgr_getenv("MPIRUN_PORT_SCAN_CONNECT_SLEEP", ENV_OPTIONAL))) { mpirun_port_scan_connect_sleep = atoi(value); } /* initialize PMI library if we're using it, and get rank, ranks, and jobid from PMI */ if (mpirun_pmi_enable) { #ifdef HAVE_PMI /* initialize the PMI library */ int spawned = 0; if (PMI_Init(&spawned) != PMI_SUCCESS) { pmgr_error("Failed to initialize PMI library @ file %s:%d", __FILE__, __LINE__ ); PMI_Abort(1, "Failed to initialize PMI library"); } if (spawned) { pmgr_error("Spawned processes not supported @ file %s:%d", __FILE__, __LINE__ ); PMI_Abort(1, "Spawned processes not supported"); } /* get my rank */ if (PMI_Get_rank(&pmgr_me) != PMI_SUCCESS) { pmgr_error("Getting rank @ file %s:%d", __FILE__, __LINE__ ); PMI_Abort(1, "Failed to get rank from PMI"); } /* get the number of ranks in this job */ if (PMI_Get_size(&pmgr_nprocs) != PMI_SUCCESS) { pmgr_error("Getting number of ranks in job @ file %s:%d", __FILE__, __LINE__ ); PMI_Abort(1, "Failed to get number of ranks in job"); } /* get jobid */ if (PMI_Get_appnum(&pmgr_id) != PMI_SUCCESS) { pmgr_error("Getting job id @ file %s:%d", __FILE__, __LINE__ ); PMI_Abort(1, "Failed to get job id from PMI"); } #endif /* ifdef HAVE_PMI */ } /* ======================================================= * Check that we have valid values * ======================================================= */ /* MPIRUN_CLIENT_DEBUG={0,1} disables/enables debug statements */ /* this comes *after* MPIRUN_RANK and MPIRUN_NPROCS since those are used to print debug messages */ if ((value = pmgr_getenv("MPIRUN_CLIENT_DEBUG", ENV_OPTIONAL)) != NULL) { pmgr_echo_debug = atoi(value); int print_rank = 0; if (pmgr_echo_debug > 0) { if (pmgr_echo_debug <= 1*PMGR_DEBUG_LEVELS) { print_rank = (pmgr_me == 0); /* just rank 0 prints */ } else if (pmgr_echo_debug <= 2*PMGR_DEBUG_LEVELS) { print_rank = (pmgr_me == 0 || pmgr_me == pmgr_nprocs-1); /* just rank 0 and rank N-1 print */ } else { print_rank = 1; /* all ranks print */ } if (print_rank) { pmgr_echo_debug = 1 + (pmgr_echo_debug-1) % PMGR_DEBUG_LEVELS; } else { pmgr_echo_debug = 0; } } } /* check that we have a valid number of processes */ if (pmgr_nprocs <= 0) { pmgr_error("Invalid NPROCS %s @ file %s:%d", pmgr_nprocs, __FILE__, __LINE__ ); exit(1); } /* check that our rank is valid */ if (pmgr_me < 0 || pmgr_me >= pmgr_nprocs) { pmgr_error("Invalid RANK %s @ file %s:%d", pmgr_me, __FILE__, __LINE__ ); exit(1); } /* check that we have a valid jobid */ if (pmgr_id == 0) { pmgr_error("Invalid JOBID %s @ file %s:%d", pmgr_id, __FILE__, __LINE__ ); exit(1); } /* set parameters */ *np_p = pmgr_nprocs; *me_p = pmgr_me; *id_p = pmgr_id; pmgr_gettimeofday(&end); pmgr_debug(2, "Exiting pmgr_init(), took %f seconds for %d procs", pmgr_getsecs(&end,&start), pmgr_nprocs); return PMGR_SUCCESS; }
static int s1_init(void) { PMI_BOOL initialized; int spawned; int rc, ret = OPAL_ERROR; int i; char *pmix_id, *tmp; uint32_t jobfam, stepid; opal_value_t kv; if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) { OPAL_PMI_ERROR(rc, "PMI_Initialized"); return OPAL_ERROR; } if( PMI_TRUE != initialized && PMI_SUCCESS != (rc = PMI_Init(&spawned)) ) { OPAL_PMI_ERROR(rc, "PMI_Init"); return OPAL_ERROR; } // Initialize space demands rc = PMI_KVS_Get_value_length_max(&pmix_vallen_max); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_KVS_Get_value_length_max"); goto err_exit; } rc = PMI_KVS_Get_name_length_max(&pmix_kvslen_max); if (PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_KVS_Get_name_length_max"); goto err_exit; } rc = PMI_KVS_Get_key_length_max(&pmix_keylen_max); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_KVS_Get_key_length_max"); goto err_exit; } // Initialize job environment information pmix_id = (char*)malloc(pmix_vallen_max); if( pmix_id == NULL ){ ret = OPAL_ERR_OUT_OF_RESOURCE; goto err_exit; } /* Get domain id */ if (PMI_SUCCESS != (rc = PMI_Get_kvs_domain_id(pmix_id, pmix_vallen_max))) { free(pmix_id); goto err_exit; } /* Slurm PMI provides the job id as an integer followed * by a '.', followed by essentially a stepid. The first integer * defines an overall job number. The second integer is the number of * individual jobs we have run within that allocation. So we translate * this as the overall job number equating to our job family, and * the individual number equating to our local jobid */ jobfam = strtoul(pmix_id, &tmp, 10); if (NULL == tmp) { /* hmmm - no '.', so let's just use zero */ stepid = 0; } else { tmp++; /* step over the '.' */ stepid = strtoul(tmp, NULL, 10); } /* now build the jobid */ s1_jobid = (jobfam << 16) | stepid; free(pmix_id); /* get our rank */ ret = PMI_Get_rank(&s1_rank); if( PMI_SUCCESS != ret ) { OPAL_PMI_ERROR(ret, "PMI_Get_rank"); goto err_exit; } /* store our name in the opal_proc_t so that * debug messages will make sense - an upper * layer will eventually overwrite it, but that * won't do any harm */ s1_pname.jid = s1_jobid; s1_pname.vid = s1_rank; opal_proc_set_name((opal_process_name_t*)&s1_pname); opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:s1: assigned tmp name", OPAL_NAME_PRINT(*(opal_process_name_t*)&s1_pname)); pmix_kvs_name = (char*)malloc(pmix_kvslen_max); if( pmix_kvs_name == NULL ){ ret = OPAL_ERR_OUT_OF_RESOURCE; goto err_exit; } rc = PMI_KVS_Get_my_name(pmix_kvs_name, pmix_kvslen_max); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_KVS_Get_my_name"); goto err_exit; } /* get our local proc info to find our local rank */ if (PMI_SUCCESS != (rc = PMI_Get_clique_size(&s1_nlranks))) { OPAL_PMI_ERROR(rc, "PMI_Get_clique_size"); return rc; } /* now get the specific ranks */ s1_lranks = (int*)calloc(s1_nlranks, sizeof(int)); if (NULL == s1_lranks) { rc = OPAL_ERR_OUT_OF_RESOURCE; OPAL_ERROR_LOG(rc); return rc; } if (PMI_SUCCESS != (rc = PMI_Get_clique_ranks(s1_lranks, s1_nlranks))) { OPAL_PMI_ERROR(rc, "PMI_Get_clique_ranks"); free(s1_lranks); return rc; } /* find ourselves */ for (i=0; i < s1_nlranks; i++) { if (s1_rank == s1_lranks[i]) { s1_lrank = i; s1_nrank = i; break; } } /* get universe size */ ret = PMI_Get_universe_size(&s1_usize); if (PMI_SUCCESS != ret) { OPAL_PMI_ERROR(ret, "PMI_Get_universe_size"); goto err_exit; } /* push this into the dstore for subsequent fetches */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_DSTORE_UNIV_SIZE); kv.type = OPAL_UINT32; kv.data.uint32 = s1_usize; if (OPAL_SUCCESS != (ret = opal_dstore.store(opal_dstore_internal, &OPAL_PROC_MY_NAME, &kv))) { OPAL_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); goto err_exit; } OBJ_DESTRUCT(&kv); /* get job size */ ret = PMI_Get_size(&s1_jsize); if (PMI_SUCCESS != ret) { OPAL_PMI_ERROR(ret, "PMI_Get_size"); goto err_exit; } /* get appnum */ ret = PMI_Get_appnum(&s1_appnum); if (PMI_SUCCESS != ret) { OPAL_PMI_ERROR(ret, "PMI_Get_appnum"); goto err_exit; } return OPAL_SUCCESS; err_exit: PMI_Finalize(); return ret; }
/** * Look at rdma_get_control_parameters() in * "mvapich2/trunk/src/mpid/ch3/channels/mrail/src/gen2/ibv_param.c" * */ int MPID_nem_ib_get_control_params() { int mpi_errno = 0; MPIDI_STATE_DECL(MPID_STATE_MPIDI_GET_CONTROL_PARAMS); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_GET_CONTROL_PARAMS); char* value = NULL; int my_rank = -1; PMI_Get_rank(&my_rank); get_hca_user_parameters(); /* Start HSAM Parameters */ if ((value = getenv("MV2_USE_HSAM")) != NULL) { process_info.has_hsam = (int)atoi(value); if(process_info.has_hsam) { check_hsam_parameters(); } } else { /* By default disable the HSAM, due to problem with * multi-pathing with current version of opensm and * up/down */ process_info.has_hsam = 0; } #ifdef ENABLE_QOS_SUPPORT if ((value = getenv("MV2_USE_QOS")) != NULL) { rdma_use_qos = !!atoi(value); } if ((value = getenv("MV2_3DTORUS_SUPPORT")) != NULL) { rdma_3dtorus_support = !!atoi(value); } if ((value = getenv("MV2_PATH_SL_QUERY")) != NULL) { rdma_path_sl_query = !!atoi(value); } if ((value = getenv("MV2_NUM_SLS")) != NULL) { rdma_qos_num_sls = atoi(value); if (rdma_qos_num_sls <= 0 && rdma_qos_num_sls > RDMA_QOS_MAX_NUM_SLS) { rdma_qos_num_sls = RDMA_QOS_DEFAULT_NUM_SLS; } /* User asked us to use multiple SL's without enabling QoS globally. */ if (rdma_use_qos == 0) { rdma_use_qos = 1; } } #endif /* ENABLE_QOS_SUPPORT */ if ((value = getenv("MV2_NUM_SA_QUERY_RETRIES")) != NULL) { rdma_num_sa_query_retries = !!atoi(value); if (rdma_num_sa_query_retries < RDMA_DEFAULT_NUM_SA_QUERY_RETRIES) { rdma_num_sa_query_retries = RDMA_DEFAULT_NUM_SA_QUERY_RETRIES; } } process_info.has_apm = (value = getenv("MV2_USE_APM")) != NULL ? (int) atoi(value) : 0; apm_tester = (value = getenv("MV2_USE_APM_TEST")) != NULL ? (int) atoi(value) : 0; apm_count = (value = getenv("MV2_APM_COUNT")) != NULL ? (int) atoi(value) : APM_COUNT; /* Scheduling Parameters */ if ( (value = getenv("MV2_SM_SCHEDULING")) != NULL) { if (!strcmp(value, "USE_FIRST")) { sm_scheduling = USE_FIRST; } else if (!strcmp(value, "ROUND_ROBIN")) { sm_scheduling = ROUND_ROBIN; } else if (!strcmp(value, "PROCESS_BINDING")) { sm_scheduling = PROCESS_BINDING; } else { MPIU_Usage_printf("Invalid small message scheduling\n"); } } /* End : HSAM Parameters */ #if defined(RDMA_CM) if ((value = getenv("MV2_USE_IWARP_MODE")) != NULL) { process_info.use_rdma_cm = !!atoi(value); process_info.use_iwarp_mode = !!atoi(value); } if (!process_info.use_rdma_cm){ if ((value = getenv("MV2_USE_RDMA_CM")) != NULL) { process_info.use_rdma_cm = !!atoi(value); } else { process_info.use_rdma_cm = 0; process_info.use_iwarp_mode = 0; } } if ((value = getenv("MV2_SUPPORT_DPM")) && !!atoi(value)) { process_info.use_rdma_cm = 0; process_info.use_iwarp_mode = 0; } if (process_info.use_rdma_cm) { int rank = ERROR; int pg_size = ERROR; int threshold = ERROR; if (process_info.use_iwarp_mode) { /* Trac #423 */ threshold = MPIDI_CH3I_CM_DEFAULT_IWARP_ON_DEMAND_THRESHOLD; } else { threshold = MPIDI_CH3I_CM_DEFAULT_ON_DEMAND_THRESHOLD; } PMI_Get_size(&pg_size); PMI_Get_rank(&rank); if ((value = getenv("MV2_ON_DEMAND_THRESHOLD")) != NULL){ threshold = atoi(value); } if (pg_size > threshold) { process_info.use_rdma_cm_on_demand = 1; } } #endif MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_GET_CONTROL_PARAMS); return mpi_errno; }
/** * Look at rdma_get_user_parameters() & rdma_get_control_parameters() in * "mvapich2/trunk/src/mpid/ch3/channels/mrail/src/gen2/ibv_param.c" * */ int MPID_nem_ib_get_user_params() { int mpi_errno = 0; MPIDI_STATE_DECL(MPID_STATE_MPIDI_GET_USER_PARAMS); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_GET_USER_PARAMS); char *value; int pg_size; PMI_Get_size(&pg_size); if ((value = getenv("MV2_DEFAULT_MTU")) != NULL) { if (strncmp(value,"IBV_MTU_256",11)==0) { rdma_default_mtu = IBV_MTU_256; } else if (strncmp(value,"IBV_MTU_512",11)==0) { rdma_default_mtu = IBV_MTU_512; } else if (strncmp(value,"IBV_MTU_1024",12)==0) { rdma_default_mtu = IBV_MTU_1024; } else if (strncmp(value,"IBV_MTU_2048",12)==0) { rdma_default_mtu = IBV_MTU_2048; } else if (strncmp(value,"IBV_MTU_4096",12)==0) { rdma_default_mtu = IBV_MTU_4096; } else { rdma_default_mtu = IBV_MTU_1024; } } /* Get number of qps/port used by a process */ if ((value = getenv("MV2_NUM_QP_PER_PORT")) != NULL) { rdma_num_qp_per_port = (int)atoi(value); if (rdma_num_qp_per_port > MAX_NUM_QP_PER_PORT) { rdma_num_qp_per_port = MAX_NUM_QP_PER_PORT; MPIU_Usage_printf("Warning, max qps per port is %d, change %s in " "ibv_param.h to overide the option\n", MAX_NUM_QP_PER_PORT, "MAX_NUM_QP_PER_PORT"); } } if ((value = getenv("MV2_PIN_POOL_SIZE")) != NULL) { rdma_pin_pool_size = (int)atoi(value); } if ((value = getenv("MV2_MAX_INLINE_SIZE")) != NULL) { rdma_max_inline_size = (int)atoi(value); } /* else if(num_proc > 256) { rdma_max_inline_size = 0; } */ if ((value = getenv("MV2_DEFAULT_MAX_CQ_SIZE")) != NULL) { rdma_default_max_cq_size = (int)atoi(value); } if ((value = getenv("MV2_READ_RESERVE")) != NULL) { rdma_read_reserve = (int)atoi(value); } if ((value = getenv("MV2_NUM_RDMA_BUFFER")) != NULL) { num_rdma_buffer = (int)atoi(value); } if ((value = getenv("MV2_POLLING_SET_THRESHOLD")) != NULL && process_info.has_adaptive_fast_path) { rdma_polling_set_threshold = atoi(value); } if ((value = getenv("MV2_RDMA_EAGER_LIMIT")) != NULL && process_info.has_adaptive_fast_path) { rdma_eager_limit = atoi(value); if (rdma_eager_limit < 0) rdma_eager_limit = 0; } if ((value = getenv("MV2_POLLING_SET_LIMIT")) != NULL && process_info.has_adaptive_fast_path) { rdma_polling_set_limit = atoi(value); if (rdma_polling_set_limit == -1) { rdma_polling_set_limit = log_2(pg_size); } } else if (process_info.has_adaptive_fast_path) { rdma_polling_set_limit = RDMA_DEFAULT_POLLING_SET_LIMIT; } if ((value = getenv("MV2_VBUF_TOTAL_SIZE")) != NULL) { rdma_vbuf_total_size = user_val_to_bytes(value,"MV2_VBUF_TOTAL_SIZE"); if (rdma_vbuf_total_size <= 2 * sizeof(int)) rdma_vbuf_total_size = 2 * sizeof(int); } if ((value = getenv("MV2_RDMA_FAST_PATH_BUF_SIZE")) != NULL && process_info.has_adaptive_fast_path) { rdma_fp_buffer_size = atoi(value); } /* We have read the value of the rendezvous threshold, and the number of * rails used for communication, increase the striping threshold * accordingly */ /* Messages in between will use the rendezvous protocol, however will * not be striped. * Call get_hca_parameters() before next line to set ib_hca_num_ports * and ib_hca_num_hcas to proper values */ striping_threshold = rdma_vbuf_total_size * ib_hca_num_ports * rdma_num_qp_per_port * ib_hca_num_hcas; if ((value = getenv("MV2_SRQ_MAX_SIZE")) != NULL) { viadev_srq_alloc_size = (uint32_t) atoi(value); } if ((value = getenv("MV2_SRQ_SIZE")) != NULL) { viadev_srq_fill_size = (uint32_t) atoi(value); } if ((value = getenv("MV2_SRQ_LIMIT")) != NULL) { viadev_srq_limit = (uint32_t) atoi(value); if(viadev_srq_limit > viadev_srq_fill_size) { MPIU_Usage_printf("SRQ limit shouldn't be greater than SRQ size\n"); } } if (process_info.has_srq) { rdma_credit_preserve = (viadev_srq_fill_size > 200) ? (viadev_srq_fill_size - 100) : (viadev_srq_fill_size / 2); } if ((value = getenv("MV2_IBA_EAGER_THRESHOLD")) != NULL) { rdma_iba_eager_threshold = user_val_to_bytes(value,"MV2_IBA_EAGER_THRESHOLD"); } if ((value = getenv("MV2_STRIPING_THRESHOLD")) != NULL) { striping_threshold = atoi(value); if (striping_threshold <= 0) { /* Invalid value - set to computed value */ striping_threshold = rdma_vbuf_total_size * ib_hca_num_ports * rdma_num_qp_per_port * ib_hca_num_hcas; } if (striping_threshold < rdma_iba_eager_threshold) { /* checking to make sure that the striping threshold is not less * than the RNDV threshold since it won't work as expected. */ striping_threshold = rdma_iba_eager_threshold; } } if ((value = getenv("MV2_INTEGER_POOL_SIZE")) != NULL) { rdma_integer_pool_size = (int)atoi(value); } if ((value = getenv("MV2_DEFAULT_PUT_GET_LIST_SIZE")) != NULL) { rdma_default_put_get_list_size = (int)atoi(value); } if ((value = getenv("MV2_EAGERSIZE_1SC")) != NULL) { rdma_eagersize_1sc = (int)atoi(value); } if ((value = getenv("MV2_PUT_FALLBACK_THRESHOLD")) != NULL) { rdma_put_fallback_threshold = (int)atoi(value); } if ((value = getenv("MV2_GET_FALLBACK_THRESHOLD")) != NULL) { rdma_get_fallback_threshold = (int)atoi(value); } if ((value = getenv("MV2_DEFAULT_PORT")) != NULL) { rdma_default_port = (int)atoi(value); } if ((value = getenv("MV2_DEFAULT_QP_OUS_RD_ATOM")) != NULL) { rdma_default_qp_ous_rd_atom = (uint8_t)atoi(value); } if ((value = getenv("MV2_DEFAULT_MAX_RDMA_DST_OPS")) != NULL) { rdma_default_max_rdma_dst_ops = (uint8_t)atoi(value); } if ((value = getenv("MV2_DEFAULT_PSN")) != NULL) { rdma_default_psn = (uint32_t)atoi(value); } if ((value = getenv("MV2_DEFAULT_PKEY")) != NULL) { rdma_default_pkey = (uint16_t)strtol(value, (char **) NULL,0) & PKEY_MASK; } if ((value = getenv("MV2_DEFAULT_MIN_RNR_TIMER")) != NULL) { rdma_default_min_rnr_timer = (uint8_t)atoi(value); } if ((value = getenv("MV2_DEFAULT_SERVICE_LEVEL")) != NULL) { rdma_default_service_level = (uint8_t)atoi(value); } if ((value = getenv("MV2_DEFAULT_TIME_OUT")) != NULL) { rdma_default_time_out = (uint8_t)atol(value); } if ((value = getenv("MV2_DEFAULT_STATIC_RATE")) != NULL) { rdma_default_static_rate = (uint8_t)atol(value); } if ((value = getenv("MV2_DEFAULT_SRC_PATH_BITS")) != NULL) { rdma_default_src_path_bits = (uint8_t)atoi(value); } if ((value = getenv("MV2_DEFAULT_RETRY_COUNT")) != NULL) { rdma_default_retry_count = (uint8_t)atol(value); } if ((value = getenv("MV2_DEFAULT_RNR_RETRY")) != NULL) { rdma_default_rnr_retry = (uint8_t)atol(value); } if ((value = getenv("MV2_DEFAULT_MAX_SG_LIST")) != NULL) { rdma_default_max_sg_list = (uint32_t)atol(value); } if ((value = getenv("MV2_DEFAULT_MAX_SEND_WQE")) != NULL) { rdma_default_max_send_wqe = atol(value); } /* else if(num_proc > 256) { rdma_default_max_send_wqe = 16; } */ if ((value = getenv("MV2_DEFAULT_MAX_RECV_WQE")) != NULL) { rdma_default_max_recv_wqe = atol(value); } if ((value = getenv("MV2_NDREG_ENTRIES")) != NULL) { rdma_ndreg_entries = (unsigned int)atoi(value); } if ((value = getenv("MV2_VBUF_MAX")) != NULL) { rdma_vbuf_max = atoi(value); } if ((value = getenv("MV2_INITIAL_PREPOST_DEPTH")) != NULL) { rdma_initial_prepost_depth = atoi(value); } if ((value = getenv("MV2_PREPOST_DEPTH")) != NULL) { rdma_prepost_depth = atoi(value); } if ((value = getenv("MV2_MAX_REGISTERED_PAGES")) != NULL) { rdma_max_registered_pages = atol(value); } if ((value = getenv("MV2_VBUF_POOL_SIZE")) != NULL) { rdma_vbuf_pool_size = atoi(value); } if ((value = getenv("MV2_DREG_CACHE_LIMIT")) != NULL) { rdma_dreg_cache_limit = atol(value); } if (rdma_vbuf_pool_size <= 10) { rdma_vbuf_pool_size = 10; MPIU_Usage_printf("Warning! Too small vbuf pool size (%d). " "Reset to %d\n", rdma_vbuf_pool_size, 10); } if ((value = getenv("MV2_VBUF_SECONDARY_POOL_SIZE")) != NULL) { rdma_vbuf_secondary_pool_size = atoi(value); } if (rdma_vbuf_secondary_pool_size <= 0) { rdma_vbuf_secondary_pool_size = 1; MPIU_Usage_printf("Warning! Too small secondary vbuf pool size (%d). " "Reset to %d\n", rdma_vbuf_secondary_pool_size, 1); } if (rdma_initial_prepost_depth <= rdma_prepost_noop_extra) { rdma_initial_credits = rdma_initial_prepost_depth; } else { rdma_initial_credits = rdma_initial_prepost_depth - rdma_prepost_noop_extra; } rdma_rq_size = rdma_prepost_depth + rdma_prepost_rendezvous_extra + rdma_prepost_noop_extra; MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_GET_USER_PARAMS); return mpi_errno; }
int MPID_nem_ib_get_control_params_after_hcainit() { MPIDI_STATE_DECL(MPID_STATE_RDMA_GET_CONTROL_PARAMS_AFTER); MPIDI_FUNC_ENTER(MPID_STATE_RDMA_GET_CONTROL_PARAMS_AFTER); char* value = NULL; int mpi_errno = MPI_SUCCESS; int my_rank = -1; PMI_Get_rank(&my_rank); int size; PMI_Get_size(&size); process_info.has_srq = (value = getenv("MV2_USE_SRQ")) != NULL ? !!atoi(value) : 1; #ifdef _ENABLE_XRC_ if (USE_XRC) { process_info.has_srq = 1; MPIU_Assert (MPIDI_CH3I_Process.cm_type == MPIDI_CH3I_CM_ON_DEMAND); MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_ON_DEMAND; rdma_use_coalesce = 0; rdma_use_blocking = 0; } #endif /* _ENABLE_XRC_ */ /* VBUF */ if (process_info.has_srq && (hca_list[0].hca_type != PATH_HT) && (hca_list[0].hca_type != MLX_PCI_X) && (hca_list[0].hca_type != IBM_EHCA) #if defined(RDMA_CM) && !process_info.use_iwarp_mode #endif /* defined(RDMA_CM) */ ) { process_info.post_send = MPIDI_nem_ib_post_srq_send; } else { process_info.has_srq = 0; process_info.post_send = MPIDI_nem_ib_post_send; } #if defined(CKPT) process_info.has_adaptive_fast_path = 0; rdma_polling_set_limit = 0; #else /* defined(CKPT) */ if ((value = getenv("MV2_USE_RDMA_FAST_PATH")) != NULL) { process_info.has_adaptive_fast_path = !!atoi(value); if (!process_info.has_adaptive_fast_path) { rdma_polling_set_limit = 0; } } else { process_info.has_adaptive_fast_path = 1; } #endif /* defined(CKPT) */ process_info.has_ring_startup = (value = getenv("MV2_USE_RING_STARTUP")) != NULL ? !!atoi(value) : 1; #if !defined(DISABLE_PTMALLOC) process_info.has_lazy_mem_unregister = (value = getenv("MV2_USE_LAZY_MEM_UNREGISTER")) != NULL ? !!atoi(value) : 1; #endif /* !defined(DISABLE_PTMALLOC) */ #if defined(CKPT) process_info.has_one_sided = 0; #else /* defined(CKPT) */ process_info.has_one_sided = (value = getenv("MV2_USE_RDMA_ONE_SIDED")) != NULL ? !!atoi(value) : 1; #endif /* defined(CKPT) */ if ((value = getenv("MV2_RNDV_EXT_SENDQ_SIZE")) != NULL) { rdma_rndv_ext_sendq_size = atoi(value); if (rdma_rndv_ext_sendq_size <= 1) { MPIU_Usage_printf("Setting MV2_RNDV_EXT_SENDQ_SIZE smaller than 1 " "will severely limit the MPI bandwidth.\n"); } } if ((value = getenv("MV2_COALESCE_THRESHOLD")) != NULL) { rdma_coalesce_threshold = atoi(value); if(rdma_coalesce_threshold < 1) { MPIU_Usage_printf("MV2_COALESCE_THRESHOLD must be >= 1\n"); rdma_coalesce_threshold = 1; } } if ((value = getenv("MV2_USE_COALESCE")) != NULL) { rdma_use_coalesce = !!atoi(value); } if (hca_list[0].hca_type == MLX_CX_DDR || hca_list[0].hca_type == MLX_CX_SDR || hca_list[0].hca_type == MLX_CX_QDR) { rdma_use_coalesce = 0; } if ((value = getenv("MV2_USE_SHARED_MEM")) != NULL) { rdma_use_smp = !!atoi(value); } if ((value = getenv("MV2_USE_IBOETH")) != NULL) { use_iboeth = !!atoi(value); if (1 == process_info.has_ring_startup) { if (0 == my_rank) { MPIU_Usage_printf("Ring start up cannot be used in IBoEth mode." "Falling back to PMI exchange.\r\n" "You can also set MV2_USE_RING_STARTUP=0.\r\n"); } process_info.has_ring_startup = 0; } if (!rdma_use_smp) { if (0 == my_rank) { MPIU_Usage_printf("IBoEth mode cannot function without SHMEM." "Falling back to use SHMEM.\r\n" "Please do NOT set MV2_USE_SHARED_MEM=0.\r\n"); } rdma_use_smp = 1; } } #ifdef _ENABLE_XRC_ if (!USE_XRC) { #endif if ((value = getenv("MV2_USE_BLOCKING")) != NULL) { rdma_use_blocking = !!atoi(value); /* Automatically turn off RDMA fast path */ if(rdma_use_blocking) { rdma_use_smp = 0; process_info.has_adaptive_fast_path = 0; } } #ifdef _ENABLE_XRC_ } #endif if ((value = getenv("MV2_SPIN_COUNT")) != NULL) { rdma_blocking_spin_count_threshold = atol(value); } if ((value = getenv("MV2_RNDV_PROTOCOL")) != NULL) { if (strncmp(value,"R3", 2) == 0) { rdma_rndv_protocol = MV2_LMT_PROTOCOL_R3; } } #if FALSE if ((value = getenv("MV2_RNDV_PROTOCOL")) != NULL) { if (strncmp(value,"RPUT", 4) == 0) { rdma_rndv_protocol = VAPI_PROTOCOL_RPUT; } else if (strncmp(value,"RGET", 4) == 0 #ifdef _ENABLE_XRC_ && !USE_XRC #endif ) { #if defined(CKPT) MPIU_Usage_printf("MV2_RNDV_PROTOCOL " "must be either \"RPUT\" or \"R3\" when checkpoint is enabled\n"); rdma_rndv_protocol = VAPI_PROTOCOL_RPUT; #else /* defined(CKPT) */ rdma_rndv_protocol = VAPI_PROTOCOL_RGET; #endif /* defined(CKPT) */ } else if (strncmp(value,"R3", 2) == 0) { rdma_rndv_protocol = VAPI_PROTOCOL_R3; } else { #ifdef _ENABLE_XRC_ if(!USE_XRC) #endif MPIU_Usage_printf("MV2_RNDV_PROTOCOL " "must be either \"RPUT\", \"RGET\", or \"R3\"\n"); rdma_rndv_protocol = VAPI_PROTOCOL_RPUT; } } #endif if ((value = getenv("MV2_R3_THRESHOLD")) != NULL) { rdma_r3_threshold = atoi(value); if(rdma_r3_threshold < 0) { rdma_r3_threshold = 0; } } if ((value = getenv("MV2_R3_NOCACHE_THRESHOLD")) != NULL) { rdma_r3_threshold_nocache = atoi(value); if(rdma_r3_threshold_nocache < 0) { rdma_r3_threshold_nocache = 0; } } if ((value = getenv("MV2_MAX_R3_PENDING_DATA")) !=NULL) { rdma_max_r3_pending_data = atoi(value); if (rdma_max_r3_pending_data < 0) { rdma_max_r3_pending_data = 0; } } #if defined(RDMA_CM) if (process_info.use_rdma_cm_on_demand){ process_info.use_iwarp_mode = 1; } #endif /* defined(RDMA_CM) */ /* fn_exit: */ MPIDI_FUNC_EXIT(MPID_STATE_RDMA_GET_CONTROL_PARAMS_AFTER); return mpi_errno; /* fn_fail: goto fn_exit; */ }
int main(int argc, char *argv[]) { int initialized, rank, size; int i, max_name_len, max_key_len, max_val_len; char *name, *key, *val; if (PMI_SUCCESS != PMI_Initialized(&initialized)) { return 1; } if (0 == initialized) { if (PMI_SUCCESS != PMI_Init(&initialized)) { return 1; } } if (PMI_SUCCESS != PMI_Get_rank(&rank)) { return 1; } if (PMI_SUCCESS != PMI_Get_size(&size)) { return 1; } printf("Hello, World. I am %d of %d\n", rank, size); if (PMI_SUCCESS != PMI_KVS_Get_name_length_max(&max_name_len)) { return 1; } name = (char*) malloc(max_name_len); if (NULL == name) return 1; if (PMI_SUCCESS != PMI_KVS_Get_key_length_max(&max_key_len)) { return 1; } key = (char*) malloc(max_key_len); if (NULL == key) return 1; if (PMI_SUCCESS != PMI_KVS_Get_value_length_max(&max_val_len)) { return 1; } val = (char*) malloc(max_val_len); if (NULL == val) return 1; if (PMI_SUCCESS != PMI_KVS_Get_my_name(name, max_name_len)) { return 1; } /* put my information */ snprintf(key, max_key_len, "pmi_hello-%lu-test", (long unsigned) rank); snprintf(val, max_val_len, "%lu", (long unsigned) rank); if (PMI_SUCCESS != PMI_KVS_Put(name, key, val)) { return 1; } if (PMI_SUCCESS != PMI_KVS_Commit(name)) { return 1; } if (PMI_SUCCESS != PMI_Barrier()) { return 1; } /* verify everyone's information */ for (i = 0 ; i < size ; ++i) { snprintf(key, max_key_len, "pmi_hello-%lu-test", (long unsigned) i); if (PMI_SUCCESS != PMI_KVS_Get(name, key, val, max_val_len)) { return 1; } if (i != strtol(val, NULL, 0)) { fprintf(stderr, "%d: Error: Expected %d, got %d\n", rank, i, (int) strtol(val, NULL, 0)); return 1; } } PMI_Finalize(); return 0; }
int main(void) { int rc; int rank, size; PMI_BOOL initialized; rc = PMI_Initialized(&initialized); if (rc!=PMI_SUCCESS) PMI_Abort(rc,"PMI_Initialized failed"); if (initialized!=PMI_TRUE) { int spawned; rc = PMI_Init(&spawned); if (rc!=PMI_SUCCESS) PMI_Abort(rc,"PMI_Init failed"); } rc = PMI_Get_rank(&rank); if (rc!=PMI_SUCCESS) PMI_Abort(rc,"PMI_Get_rank failed"); rc = PMI_Get_size(&size); if (rc!=PMI_SUCCESS) PMI_Abort(rc,"PMI_Get_size failed"); printf("rank %d of %d \n", rank, size); int rpn; /* rpn = ranks per node */ rc = PMI_Get_clique_size(&rpn); if (rc!=PMI_SUCCESS) PMI_Abort(rc,"PMI_Get_clique_size failed"); printf("rank %d clique size %d \n", rank, rpn); int * clique_ranks = malloc( rpn * sizeof(int) ); if (clique_ranks==NULL) PMI_Abort(rpn,"malloc failed"); rc = PMI_Get_clique_ranks(clique_ranks, rpn); if (rc!=PMI_SUCCESS) PMI_Abort(rc,"PMI_Get_clique_ranks failed"); for(int i = 0; i<rpn; i++) printf("rank %d clique[%d] = %d \n", rank, i, clique_ranks[i]); int nid; rc = PMI_Get_nid(rank, &nid); if (rc!=PMI_SUCCESS) PMI_Abort(rc,"PMI_Get_nid failed"); printf("rank %d PMI_Get_nid gives nid %d \n", rank, nid); #if OLD rca_mesh_coord_t xyz; rca_get_meshcoord( (uint16_t) nid, &xyz); printf("rank %d rca_get_meshcoord returns (%2u,%2u,%2u)\n", rank, xyz.mesh_x, xyz.mesh_y, xyz.mesh_z); #else // UNTESTED pmi_mesh_coord_t xyz; PMI_Get_meshcoord((uint16_t) nid, &xyz); printf("rank %d PMI_Get_meshcoord returns (%2u,%2u,%2u)\n", rank, xyz.mesh_x, xyz.mesh_y, xyz.mesh_z); #endif fflush(stdout); return 0; }
int MPID_Init(int *argc, char ***argv, int threadlevel_requested, int *threadlevel_provided, int *has_args, int *has_env) { int mpi_errno = MPI_SUCCESS; int pg_rank, pg_size, pg_id_sz; int appnum = -1; /* int universe_size; */ int has_parent; pscom_socket_t *socket; pscom_err_t rc; char *pg_id_name; char *parent_port; /* Call any and all MPID_Init type functions */ MPIR_Err_init(); MPIR_Datatype_init(); MPIR_Group_init(); mpid_debug_init(); assert(PSCOM_ANYPORT == -1); /* all codeplaces which depends on it are marked with: "assert(PSP_ANYPORT == -1);" */ MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_INIT); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_INIT); PMICALL(PMI_Init(&has_parent)); PMICALL(PMI_Get_rank(&pg_rank)); PMICALL(PMI_Get_size(&pg_size)); PMICALL(PMI_Get_appnum(&appnum)); *has_args = 1; *has_env = 1; /* without PMI_Get_universe_size() we see pmi error: '[unset]: write_line error; fd=-1' in PMI_KVS_Get()! */ /* PMICALL(PMI_Get_universe_size(&universe_size)); */ if (pg_rank < 0) pg_rank = 0; if (pg_size <= 0) pg_size = 1; if ( #ifndef MPICH_IS_THREADED 1 #else threadlevel_requested < MPI_THREAD_MULTIPLE #endif ) { rc = pscom_init(PSCOM_VERSION); if (rc != PSCOM_SUCCESS) { fprintf(stderr, "pscom_init(0x%04x) failed : %s\n", PSCOM_VERSION, pscom_err_str(rc)); exit(1); } } else { rc = pscom_init_thread(PSCOM_VERSION); if (rc != PSCOM_SUCCESS) { fprintf(stderr, "pscom_init_thread(0x%04x) failed : %s\n", PSCOM_VERSION, pscom_err_str(rc)); exit(1); } } /* Initialize the switches */ pscom_env_get_uint(&MPIDI_Process.env.enable_collectives, "PSP_COLLECTIVES"); #ifdef PSCOM_HAS_ON_DEMAND_CONNECTIONS /* if (pg_size > 32) MPIDI_Process.env.enable_ondemand = 1; */ pscom_env_get_uint(&MPIDI_Process.env.enable_ondemand, "PSP_ONDEMAND"); #else MPIDI_Process.env.enable_ondemand = 0; #endif /* enable_ondemand_spawn defaults to enable_ondemand */ MPIDI_Process.env.enable_ondemand_spawn = MPIDI_Process.env.enable_ondemand; pscom_env_get_uint(&MPIDI_Process.env.enable_ondemand_spawn, "PSP_ONDEMAND_SPAWN"); /* take SMP-related locality information into account (e.g., for MPI_Win_allocate_shared) */ pscom_env_get_uint(&MPIDI_Process.env.enable_smp_awareness, "PSP_SMP_AWARENESS"); /* take MSA-related topology information into account */ pscom_env_get_uint(&MPIDI_Process.env.enable_msa_awareness, "PSP_MSA_AWARENESS"); if(MPIDI_Process.env.enable_msa_awareness) { pscom_env_get_uint(&MPIDI_Process.msa_module_id, "PSP_MSA_MODULE_ID"); } #ifdef MPID_PSP_TOPOLOGY_AWARE_COLLOPS /* use hierarchy-aware collectives on SMP level */ pscom_env_get_uint(&MPIDI_Process.env.enable_smp_aware_collops, "PSP_SMP_AWARE_COLLOPS"); /* use hierarchy-aware collectives on MSA level (disables SMP-aware collops / FIX ME!) */ pscom_env_get_uint(&MPIDI_Process.env.enable_msa_aware_collops, "PSP_MSA_AWARE_COLLOPS"); if(MPIDI_Process.env.enable_msa_aware_collops) MPIDI_Process.env.enable_smp_aware_collops = 0; #endif #ifdef MPID_PSP_CREATE_HISTOGRAM /* collect statistics information and print them at the end of a run */ pscom_env_get_uint(&MPIDI_Process.env.enable_histogram, "PSP_HISTOGRAM"); pscom_env_get_uint(&MPIDI_Process.histo.max_size, "PSP_HISTOGRAM_MAX"); pscom_env_get_uint(&MPIDI_Process.histo.min_size, "PSP_HISTOGRAM_MIN"); pscom_env_get_uint(&MPIDI_Process.histo.step_width, "PSP_HISTOGRAM_SHIFT"); #endif /* pscom_env_get_uint(&mpir_allgather_short_msg, "PSP_ALLGATHER_SHORT_MSG"); pscom_env_get_uint(&mpir_allgather_long_msg, "PSP_ALLGATHER_LONG_MSG"); pscom_env_get_uint(&mpir_allreduce_short_msg, "PSP_ALLREDUCE_SHORT_MSG"); pscom_env_get_uint(&mpir_alltoall_short_msg, "PSP_ALLTOALL_SHORT_MSG"); pscom_env_get_uint(&mpir_alltoall_medium_msg, "PSP_ALLTOALL_MEDIUM_MSG"); pscom_env_get_uint(&mpir_alltoall_throttle, "PSP_ALLTOALL_THROTTLE"); pscom_env_get_uint(&mpir_bcast_short_msg, "PSP_BCAST_SHORT_MSG"); pscom_env_get_uint(&mpir_bcast_long_msg, "PSP_BCAST_LONG_MSG"); pscom_env_get_uint(&mpir_bcast_min_procs, "PSP_BCAST_MIN_PROCS"); pscom_env_get_uint(&mpir_gather_short_msg, "PSP_GATHER_SHORT_MSG"); pscom_env_get_uint(&mpir_gather_vsmall_msg, "PSP_GATHER_VSMALL_MSG"); pscom_env_get_uint(&mpir_redscat_commutative_long_msg, "PSP_REDSCAT_COMMUTATIVE_LONG_MSG"); pscom_env_get_uint(&mpir_redscat_noncommutative_short_msg, "PSP_REDSCAT_NONCOMMUTATIVE_SHORT_MSG"); pscom_env_get_uint(&mpir_reduce_short_msg, "PSP_REDUCE_SHORT_MSG"); pscom_env_get_uint(&mpir_scatter_short_msg, "PSP_SCATTER_SHORT_MSG"); */ socket = pscom_open_socket(0, 0); if (!MPIDI_Process.env.enable_ondemand) { socket->ops.con_accept = mpid_con_accept; } { char name[10]; snprintf(name, sizeof(name), "r%07u", (unsigned)pg_rank); pscom_socket_set_name(socket, name); } rc = pscom_listen(socket, PSCOM_ANYPORT); if (rc != PSCOM_SUCCESS) { PRINTERROR("pscom_listen(PSCOM_ANYPORT)"); goto fn_fail; } /* Note that if pmi is not availble, the value of MPI_APPNUM is not set */ /* if (appnum != -1) {*/ MPIR_Process.attrs.appnum = appnum; /* }*/ #if 0 // see mpiimpl.h: // typedef struct PreDefined_attrs { // int appnum; /* Application number provided by mpiexec (MPI-2) */ // int host; /* host */ // int io; /* standard io allowed */ // int lastusedcode; /* last used error code (MPI-2) */ // int tag_ub; /* Maximum message tag */ // int universe; /* Universe size from mpiexec (MPI-2) */ // int wtime_is_global; /* Wtime is global over processes in COMM_WORLD */ // } PreDefined_attrs; #endif MPIR_Process.attrs.tag_ub = MPIDI_TAG_UB; /* obtain the id of the process group */ PMICALL(PMI_KVS_Get_name_length_max(&pg_id_sz)); pg_id_name = MPL_malloc(pg_id_sz + 1, MPL_MEM_STRINGS); if (!pg_id_name) { PRINTERROR("MPL_malloc()"); goto fn_fail; } PMICALL(PMI_KVS_Get_my_name(pg_id_name, pg_id_sz)); /* safe */ /* MPIDI_Process.socket = socket; */ MPIDI_Process.my_pg_rank = pg_rank; MPIDI_Process.my_pg_size = pg_size; MPIDI_Process.pg_id_name = pg_id_name; if (!MPIDI_Process.env.enable_ondemand) { /* Create and establish all connections */ if (InitPortConnections(socket) != MPI_SUCCESS) goto fn_fail; } else { /* Create all connections as "on demand" connections. */ if (InitPscomConnections(socket) != MPI_SUCCESS) goto fn_fail; } #ifdef MPID_PSP_TOPOLOGY_AWARE_COLLOPS { int grank; int my_node_id = -1; int remote_node_id = -1; int* node_id_table; if(MPIDI_Process.env.enable_msa_awareness && MPIDI_Process.env.enable_msa_aware_collops) { my_node_id = MPIDI_Process.msa_module_id; assert(my_node_id > -1); } else if(MPIDI_Process.env.enable_smp_awareness && MPIDI_Process.env.enable_smp_aware_collops) { if (!MPIDI_Process.env.enable_ondemand) { /* In the PSP_ONDEMAND=0 case, we can just check the pscom connection types: */ for (grank = 0; grank < pg_size; grank++) { pscom_connection_t *con = grank2con_get(grank); if( (con->type == PSCOM_CON_TYPE_SHM) || (pg_rank == grank) ) { my_node_id = grank; break; } } } else { /* In the PSP_ONDEMAND=1 case, we have to use a hash of the host name: */ my_node_id = MPID_PSP_get_host_hash(); if(my_node_id < 0) my_node_id *= -1; } assert(my_node_id > -1); } else { /* No hierarchy-awareness requested */ assert(my_node_id == -1); } if(my_node_id > -1) { node_id_table = MPL_malloc(pg_size * sizeof(int), MPL_MEM_OBJECT); if(pg_rank != 0) { /* gather: */ pscom_connection_t *con = grank2con_get(0); assert(con); pscom_send(con, NULL, 0, &my_node_id, sizeof(int)); /* bcast: */ rc = pscom_recv_from(con, NULL, 0, node_id_table, pg_size*sizeof(int)); assert(rc == PSCOM_SUCCESS); } else { /* gather: */ node_id_table[0] = my_node_id; for(grank=1; grank < pg_size; grank++) { pscom_connection_t *con = grank2con_get(grank); assert(con); rc = pscom_recv_from(con, NULL, 0, &remote_node_id, sizeof(int)); assert(rc == PSCOM_SUCCESS); node_id_table[grank] = remote_node_id; } /* bcast: */ for(grank=1; grank < pg_size; grank++) { pscom_connection_t *con = grank2con_get(grank); pscom_send(con, NULL, 0, node_id_table, pg_size*sizeof(int)); } } MPIDI_Process.node_id_table = node_id_table; } else { /* No hierarchy-awareness requested */ assert(MPIDI_Process.node_id_table == NULL); } } #endif /* * Initialize the MPI_COMM_WORLD object */ { MPIR_Comm * comm; int grank; MPIDI_PG_t * pg_ptr; int pg_id_num; MPIDI_VCRT_t * vcrt; comm = MPIR_Process.comm_world; comm->rank = pg_rank; comm->remote_size = pg_size; comm->local_size = pg_size; comm->pscom_socket = socket; vcrt = MPIDI_VCRT_Create(comm->remote_size); assert(vcrt); MPID_PSP_comm_set_vcrt(comm, vcrt); MPIDI_PG_Convert_id(pg_id_name, &pg_id_num); MPIDI_PG_Create(pg_size, pg_id_num, &pg_ptr); assert(pg_ptr == MPIDI_Process.my_pg); for (grank = 0; grank < pg_size; grank++) { /* MPIR_CheckDisjointLpids() in mpi/comm/intercomm_create.c expect lpid to be smaller than 4096!!! Else you will see an "Fatal error in MPI_Intercomm_create" */ pscom_connection_t *con = grank2con_get(grank); pg_ptr->vcr[grank] = MPIDI_VC_Create(pg_ptr, grank, con, grank); comm->vcr[grank] = MPIDI_VC_Dup(pg_ptr->vcr[grank]); } mpi_errno = MPIR_Comm_commit(comm); assert(mpi_errno == MPI_SUCCESS); } /* * Initialize the MPI_COMM_SELF object */ { MPIR_Comm * comm; MPIDI_VCRT_t * vcrt; comm = MPIR_Process.comm_self; comm->rank = 0; comm->remote_size = 1; comm->local_size = 1; comm->pscom_socket = socket; vcrt = MPIDI_VCRT_Create(comm->remote_size); assert(vcrt); MPID_PSP_comm_set_vcrt(comm, vcrt); comm->vcr[0] = MPIDI_VC_Dup(MPIR_Process.comm_world->vcr[pg_rank]); mpi_errno = MPIR_Comm_commit(comm); assert(mpi_errno == MPI_SUCCESS); } /* ToDo: move MPID_enable_receive_dispach to bg thread */ MPID_enable_receive_dispach(socket); if (threadlevel_provided) { *threadlevel_provided = (MPICH_THREAD_LEVEL < threadlevel_requested) ? MPICH_THREAD_LEVEL : threadlevel_requested; } if (has_parent) { MPIR_Comm * comm; mpi_errno = MPID_PSP_GetParentPort(&parent_port); assert(mpi_errno == MPI_SUCCESS); /* printf("%s:%u:%s Child with Parent: %s\n", __FILE__, __LINE__, __func__, parent_port); */ mpi_errno = MPID_Comm_connect(parent_port, NULL, 0, MPIR_Process.comm_world, &comm); if (mpi_errno != MPI_SUCCESS) { fprintf(stderr, "MPI_Comm_connect(parent) failed!\n"); goto fn_fail; } assert(comm != NULL); MPL_strncpy(comm->name, "MPI_COMM_PARENT", MPI_MAX_OBJECT_NAME); MPIR_Process.comm_parent = comm; } MPID_PSP_shm_rma_init(); fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_INIT); return mpi_errno; /* --- */ fn_fail: /* A failing MPI_Init() did'nt call the MPI error handler, which mostly calls abort(). This cause MPI_Init() to return the mpi_errno, which nobody check, causing segfaultm double frees and so on. To prevent strange error messages, we now call _exit(1) here. */ _exit(1); }
static int mca_initialize_pmi_v1(void) { PMI_BOOL initialized; int spawned; int rc, ret = OPAL_ERROR; if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) { OPAL_PMI_ERROR(rc, "PMI_Initialized"); return OPAL_ERROR; } if( PMI_TRUE != initialized && PMI_SUCCESS != (rc = PMI_Init(&spawned)) ) { OPAL_PMI_ERROR(rc, "PMI_Init"); return OPAL_ERROR; } // Initialize space demands rc = PMI_KVS_Get_value_length_max(&pmi_vallen_max); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_KVS_Get_value_length_max"); goto err_exit; } rc = PMI_KVS_Get_name_length_max(&pmi_kvslen_max); if (PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_KVS_Get_name_length_max"); goto err_exit; } rc = PMI_KVS_Get_key_length_max(&pmi_keylen_max); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_KVS_Get_key_length_max"); goto err_exit; } // Initialize job environment information rc = PMI_Get_rank(&pmi_rank); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_rank"); return OPAL_ERROR; } rc = PMI_Get_universe_size(&pmi_usize); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); goto err_exit; } rc = PMI_Get_size(&pmi_size); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_size"); goto err_exit; } rc = PMI_Get_appnum(&pmi_appnum); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_appnum"); goto err_exit; } pmi_kvs_name = (char*)malloc(pmi_kvslen_max); if( pmi_kvs_name == NULL ){ ret = OPAL_ERR_OUT_OF_RESOURCE; goto err_exit; } rc = PMI_KVS_Get_my_name(pmi_kvs_name,pmi_kvslen_max); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI2_Job_GetId"); goto err_exit; } return OPAL_SUCCESS; err_exit: PMI_Finalize(); return ret; }