/* this function is not used in pmi2 */ static int publish_node_id(MPIDI_PG_t *pg, int our_pg_rank) { int mpi_errno = MPI_SUCCESS; int pmi_errno; int ret; char *key; int key_max_sz; char *kvs_name; MPIU_CHKLMEM_DECL(1); /* set MPIU_hostname */ ret = gethostname(MPIU_hostname, MAX_HOSTNAME_LEN); MPIR_ERR_CHKANDJUMP2(ret == -1, mpi_errno, MPI_ERR_OTHER, "**sock_gethost", "**sock_gethost %s %d", MPIU_Strerror(errno), errno); MPIU_hostname[MAX_HOSTNAME_LEN-1] = '\0'; /* Allocate space for pmi key */ pmi_errno = PMI_KVS_Get_key_length_max(&key_max_sz); MPIR_ERR_CHKANDJUMP1(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %d", pmi_errno); MPIU_CHKLMEM_MALLOC(key, char *, key_max_sz, mpi_errno, "key"); mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* Put my hostname id */ if (pg->size > 1) { memset(key, 0, key_max_sz); MPL_snprintf(key, key_max_sz, "hostname[%d]", our_pg_rank); pmi_errno = PMI_KVS_Put(kvs_name, key, MPIU_hostname); MPIR_ERR_CHKANDJUMP1(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_put", "**pmi_kvs_put %d", pmi_errno); pmi_errno = PMI_KVS_Commit(kvs_name); MPIR_ERR_CHKANDJUMP1(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_commit", "**pmi_kvs_commit %d", pmi_errno); pmi_errno = PMI_Barrier(); MPIR_ERR_CHKANDJUMP1(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_barrier", "**pmi_barrier %d", pmi_errno); } fn_exit: MPIU_CHKLMEM_FREEALL(); return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_CH3_GetParentPort(char ** parent_port) { int mpi_errno = MPI_SUCCESS; int pmi_errno; char val[MPIDI_MAX_KVS_VALUE_LEN]; if (parent_port_name == NULL) { char *kvsname = NULL; /* We can always use PMI_KVS_Get on our own process group */ MPIDI_PG_GetConnKVSname( &kvsname ); #ifdef USE_PMI2_API { int vallen = 0; MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX); pmi_errno = PMI2_KVS_Get(kvsname, PMI2_ID_NULL, PARENT_PORT_KVSKEY, val, sizeof(val), &vallen); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX); if (pmi_errno) MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**pmi_kvsget", "**pmi_kvsget %s", PARENT_PORT_KVSKEY); } #else MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX); pmi_errno = PMI_KVS_Get( kvsname, PARENT_PORT_KVSKEY, val, sizeof(val)); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX); if (pmi_errno) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**pmi_kvsget", "**pmi_kvsget %d", pmi_errno); goto fn_exit; } #endif parent_port_name = MPL_strdup(val); if (parent_port_name == NULL) { MPIR_ERR_POP(mpi_errno); /* FIXME DARIUS */ } } *parent_port = parent_port_name; fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_CH3_Init(int has_parent, MPIDI_PG_t * pg, int pg_rank) { int mpi_errno = MPI_SUCCESS; int pg_size, threshold, dpm = 0, p; char *dpm_str, *value, *conn_info = NULL; MPIDI_VC_t *vc; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_INIT); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_INIT); if (MPIDI_CH3_Pkt_size_index[MPIDI_CH3_PKT_CLOSE] != sizeof (MPIDI_CH3_Pkt_close_t)) { MPIU_ERR_SETFATALANDJUMP1( mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s", "Failed sanity check! Packet size table mismatch"); } pg_size = MPIDI_PG_Get_size(pg); /*Determine to use which connection management*/ threshold = MPIDI_CH3I_CM_DEFAULT_ON_DEMAND_THRESHOLD; /*check ON_DEMAND_THRESHOLD*/ value = getenv("MV2_ON_DEMAND_THRESHOLD"); if (value) { threshold = atoi(value); } dpm_str = getenv("MV2_SUPPORT_DPM"); if (dpm_str) { dpm = !!atoi(dpm_str); } MPIDI_CH3I_Process.has_dpm = dpm; if(MPIDI_CH3I_Process.has_dpm) { setenv("MV2_ENABLE_AFFINITY", "0", 1); } #ifdef _ENABLE_XRC_ value = getenv ("MV2_USE_XRC"); if (value) { USE_XRC = atoi(value); if (USE_XRC) { /* Enable on-demand */ threshold = 1; } } #endif /* _ENABLE_XRC_ */ #ifdef _ENABLE_UD_ if ((value = getenv("MV2_HYBRID_ENABLE_THRESHOLD")) != NULL) { rdma_hybrid_enable_threshold = atoi(value); } if ((value = getenv("MV2_USE_UD_HYBRID")) != NULL) { rdma_enable_hybrid = atoi(value); } if (pg_size < rdma_hybrid_enable_threshold) { rdma_enable_hybrid = 0; } #endif if (pg_size > threshold || dpm #ifdef _ENABLE_XRC_ || USE_XRC #endif /* _ENABLE_XRC_ */ #ifdef _ENABLE_UD_ || rdma_enable_hybrid #endif ) { MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_ON_DEMAND; MPIDI_CH3I_Process.num_conn = 0; } else { MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_BASIC_ALL2ALL; } #if defined(RDMA_CM) if (((value = getenv("MV2_USE_RDMA_CM")) != NULL || (value = getenv("MV2_USE_IWARP_MODE")) != NULL) && atoi(value) && ! dpm) { MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_RDMA_CM; } else { rdma_cm_get_hca_type(&MPIDI_CH3I_RDMA_Process); } #endif /* defined(RDMA_CM) */ MPIDI_PG_GetConnKVSname(&pg->ch.kvs_name); #if defined(CKPT) #if defined(RDMA_CM) if (MPIDI_CH3I_Process.cm_type == MPIDI_CH3I_CM_RDMA_CM) { MPIU_Error_printf("Error: Checkpointing does not work with RDMA CM.\n" "Please configure and compile MVAPICH2 with checkpointing disabled " "or without support for RDMA CM.\n"); MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_OTHER, "**fail"); } #endif /* defined(RDMA_CM) */ // Always use CM_ON_DEMAND for Checkpoint/Restart and Migration MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_ON_DEMAND; #endif /* defined(CKPT) */ #ifdef _ENABLE_UD_ if (rdma_enable_hybrid) { MPIU_Assert(MPIDI_CH3I_Process.cm_type == MPIDI_CH3I_CM_ON_DEMAND); } #endif /* save my vc_ptr for easy access */ MPIDI_PG_Get_vc(pg, pg_rank, &MPIDI_CH3I_Process.vc); /* Initialize Progress Engine */ if ((mpi_errno = MPIDI_CH3I_Progress_init())) { MPIU_ERR_POP(mpi_errno); } /* Check for SMP only */ MPIDI_CH3I_set_smp_only(); if (!SMP_ONLY) { switch (MPIDI_CH3I_Process.cm_type) { /* allocate rmda memory and set up the queues */ case MPIDI_CH3I_CM_ON_DEMAND: #if defined(RDMA_CM) case MPIDI_CH3I_CM_RDMA_CM: #endif /* defined(RDMA_CM) */ mpi_errno = MPIDI_CH3I_CM_Init(pg, pg_rank, &conn_info); if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); } break; default: /*call old init to setup all connections*/ if ((mpi_errno = MPIDI_CH3I_RDMA_init(pg, pg_rank)) != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); } /* All vc should be connected */ for (p = 0; p < pg_size; ++p) { MPIDI_PG_Get_vc(pg, p, &vc); vc->ch.state = MPIDI_CH3I_VC_STATE_IDLE; } break; } } #if defined(CKPT) #if defined(DISABLE_PTMALLOC) MPIU_Error_printf("Error: Checkpointing does not work without registration " "caching enabled.\nPlease configure and compile MVAPICH2 without checkpointing " " or enable registration caching.\n"); MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_OTHER, "**fail"); #endif /* defined(DISABLE_PTMALLOC) */ if ((mpi_errno = MPIDI_CH3I_CR_Init(pg, pg_rank, pg_size))) { MPIU_ERR_POP(mpi_errno); } #endif /* defined(CKPT) */ /* set connection info for dynamic process management */ if (conn_info && dpm) { mpi_errno = MPIDI_PG_SetConnInfo(pg_rank, (const char *)conn_info); if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); } } MPIU_Free(conn_info); /* Initialize the smp channel */ if ((mpi_errno = MPIDI_CH3I_SMP_init(pg))) { MPIU_ERR_POP(mpi_errno); } if (SMP_INIT) { for (p = 0; p < pg_size; ++p) { MPIDI_PG_Get_vc(pg, p, &vc); /* Mark the SMP VC as Idle */ if (vc->smp.local_nodes >= 0) { vc->ch.state = MPIDI_CH3I_VC_STATE_IDLE; if (SMP_ONLY) { MPIDI_CH3I_SMP_Init_VC(vc); } #ifdef _ENABLE_XRC_ VC_XST_SET (vc, XF_SMP_VC); #endif } } } else { extern int enable_shmem_collectives; enable_shmem_collectives = SMP_INIT; } /* Set the eager max msg size now that we know SMP and RDMA are initialized. * The max message size is also set during VC initialization, but the state * of SMP is unknown at that time. */ for (p = 0; p < pg_size; ++p) { MPIDI_PG_Get_vc(pg, p, &vc); vc->eager_max_msg_sz = MPIDI_CH3_EAGER_MAX_MSG_SIZE(vc); } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_INIT); return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_CH3U_Check_for_failed_procs(void) { int mpi_errno = MPI_SUCCESS; int pmi_errno; int len; char *kvsname; MPIR_Group *prev_failed_group, *new_failed_group; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS); /* FIXME: Currently this only handles failed processes in comm_world. We need to fix hydra to include the pgid along with the rank, then we need to create the failed group from something bigger than comm_world. */ mpi_errno = MPIDI_PG_GetConnKVSname(&kvsname); if (mpi_errno) MPIR_ERR_POP(mpi_errno); #ifdef USE_PMI2_API { int vallen = 0; pmi_errno = PMI2_KVS_Get(kvsname, PMI2_ID_NULL, "PMI_dead_processes", MPIDI_failed_procs_string, PMI2_MAX_VALLEN, &vallen); MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get"); } #else pmi_errno = PMI_KVS_Get_value_length_max(&len); MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_value_length_max"); pmi_errno = PMI_KVS_Get(kvsname, "PMI_dead_processes", MPIDI_failed_procs_string, len); MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get"); #endif if (*MPIDI_failed_procs_string == '\0') { /* there are no failed processes */ MPIDI_Failed_procs_group = MPIR_Group_empty; goto fn_exit; } MPL_DBG_MSG_S(MPIDI_CH3_DBG_OTHER, TYPICAL, "Received proc fail notification: %s", MPIDI_failed_procs_string); /* save reference to previous group so we can identify new failures */ prev_failed_group = MPIDI_Failed_procs_group; /* Parse the list of failed processes */ MPIDI_CH3U_Get_failed_group(-2, &MPIDI_Failed_procs_group); /* get group of newly failed processes */ mpi_errno = MPIR_Group_difference_impl(MPIDI_Failed_procs_group, prev_failed_group, &new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); if (new_failed_group != MPIR_Group_empty) { mpi_errno = MPIDI_CH3I_Comm_handle_failed_procs(new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = terminate_failed_VCs(new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Group_release(new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } /* free prev group */ if (prev_failed_group != MPIR_Group_empty) { mpi_errno = MPIR_Group_release(prev_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS); return mpi_errno; fn_oom: /* out-of-memory handler for utarray operations */ MPIR_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "utarray"); fn_fail: goto fn_exit; }