MXM_VERNO_MAJOR, MXM_VERNO_MINOR, (cur_ver >> MXM_MAJOR_BIT) & 0xff, (cur_ver >> MXM_MINOR_BIT) & 0xff)); } _mxm_obj.compiletime_version = MXM_VERNO_STRING; #if MXM_API >= MXM_VERSION(3,0) _mxm_obj.runtime_version = MPL_strdup(mxm_get_version_string()); #else _mxm_obj.runtime_version = MPL_malloc(sizeof(MXM_VERNO_STRING) + 10, MPL_MEM_STRINGS); snprintf(_mxm_obj.runtime_version, (sizeof(MXM_VERNO_STRING) + 9), "%ld.%ld", (cur_ver >> MXM_MAJOR_BIT) & 0xff, (cur_ver >> MXM_MINOR_BIT) & 0xff); #endif _mxm_obj.conf.bulk_connect = cur_ver < MXM_VERSION(3, 2) ? 0 : MPIR_CVAR_NEMESIS_MXM_BULK_CONNECT; _mxm_obj.conf.bulk_disconnect = cur_ver < MXM_VERSION(3, 2) ? 0 : MPIR_CVAR_NEMESIS_MXM_BULK_DISCONNECT; if (cur_ver < MXM_VERSION(3, 2) && (_mxm_obj.conf.bulk_connect || _mxm_obj.conf.bulk_disconnect)) { _mxm_obj.conf.bulk_connect = 0; _mxm_obj.conf.bulk_disconnect = 0; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE, (MPL_DBG_FDEST, "WARNING: MPICH runs with %s version of MXM that is less than 3.2, " "so bulk connect/disconnect cannot work properly and will be turn off.", _mxm_obj.runtime_version)); } fn_exit:
int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, struct ompi_proc_t** procs, /*const*/ struct mca_mtl_base_endpoint_t **mtl_peer_data) { #if MXM_API < MXM_VERSION(2,0) ompi_mtl_mxm_ep_conn_info_t *ep_info; mxm_conn_req_t *conn_reqs; int timeout; #endif void *ep_address; size_t ep_address_len; mxm_error_t err; size_t i; int rc; assert(mtl == &ompi_mtl_mxm.super); #if MXM_API < MXM_VERSION(2,0) /* Allocate connection requests */ conn_reqs = calloc(nprocs, sizeof(mxm_conn_req_t)); ep_info = calloc(nprocs, sizeof(ompi_mtl_mxm_ep_conn_info_t)); if (NULL == conn_reqs || NULL == ep_info) { rc = OMPI_ERR_OUT_OF_RESOURCE; goto bail; } #endif /* Get the EP connection requests for all the processes from modex */ for (i = 0; i < nprocs; ++i) { rc = ompi_mtl_mxm_recv_ep_address(procs[i], &ep_address, &ep_address_len); if (rc != OMPI_SUCCESS) { goto bail; } #if MXM_API < MXM_VERSION(2,0) if (ep_address_len != sizeof(ep_info[i])) { MXM_ERROR("Invalid endpoint address length"); rc = OMPI_ERROR; goto bail; } memcpy(&ep_info[i], ep_address, ep_address_len); conn_reqs[i].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SELF]); conn_reqs[i].ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SHM]); conn_reqs[i].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_RDMA]); #else mtl_peer_data[i] = (mca_mtl_mxm_endpoint_t *) OBJ_NEW(mca_mtl_mxm_endpoint_t); mtl_peer_data[i]->mtl_mxm_module = &ompi_mtl_mxm; err = mxm_ep_connect(ompi_mtl_mxm.ep, ep_address, &mtl_peer_data[i]->mxm_conn); if (err != MXM_OK) { MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); rc = OMPI_ERROR; goto bail; } #endif free(ep_address); } #if MXM_API < MXM_VERSION(2,0) /* Connect to remote peers */ timeout = (mxm_get_version() < MXM_VERSION(1,5)) ? 1000 : -1; err = mxm_ep_connect(ompi_mtl_mxm.ep, conn_reqs, nprocs, timeout); if (MXM_OK != err) { MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); for (i = 0; i < nprocs; ++i) { if (MXM_OK != conn_reqs[i].error) { MXM_ERROR("MXM EP connect to %s error: %s\n", procs[i]->proc_hostname, mxm_error_string(conn_reqs[i].error)); } } rc = OMPI_ERROR; goto bail; } /* Save returned connections */ for (i = 0; i < nprocs; ++i) { mtl_peer_data[i] = (mca_mtl_mxm_endpoint_t *) OBJ_NEW(mca_mtl_mxm_endpoint_t); mtl_peer_data[i]->mtl_mxm_module = &ompi_mtl_mxm; mtl_peer_data[i]->mxm_conn = conn_reqs[i].conn; } #endif rc = OMPI_SUCCESS; bail: #if MXM_API < MXM_VERSION(2,0) free(conn_reqs); free(ep_info); #endif return rc; }
static int ompi_mtl_mxm_component_register(void) { mca_base_component_t*c; c = &mca_mtl_mxm_component.super.mtl_version; ompi_mtl_mxm.verbose = 0; (void) mca_base_component_var_register(c, "verbose", "Verbose level of the MXM component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, &ompi_mtl_mxm.verbose); #if MXM_API > MXM_VERSION(2,0) ompi_mtl_mxm.mxm_np = 0; #else ompi_mtl_mxm.mxm_np = 128; #endif (void) mca_base_component_var_register(c, "np", "[integer] Minimal number of MPI processes in a single job " "required to activate the MXM transport", MCA_BASE_VAR_TYPE_INT, NULL,0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &ompi_mtl_mxm.mxm_np); param_priority = 30; (void) mca_base_component_var_register (c, "priority", "Priority of the MXM MTL component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, ¶m_priority); #if MXM_API >= MXM_VERSION(3,1) { unsigned long cur_ver = mxm_get_version(); ompi_mtl_mxm.bulk_connect = 0; if (cur_ver < MXM_VERSION(3,2)) { ompi_mtl_mxm.bulk_disconnect = 0; } else { ompi_mtl_mxm.bulk_disconnect = 1; } (void) mca_base_component_var_register(c, "bulk_connect", "[integer] use bulk connect", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &ompi_mtl_mxm.bulk_connect); (void) mca_base_component_var_register(c, "bulk_disconnect", "[integer] use bulk disconnect", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &ompi_mtl_mxm.bulk_disconnect); if (cur_ver < MXM_VERSION(3,2) && (ompi_mtl_mxm.bulk_connect || ompi_mtl_mxm.bulk_disconnect)) { ompi_mtl_mxm.bulk_connect = 0; ompi_mtl_mxm.bulk_disconnect = 0; MXM_VERBOSE(1, "WARNING: OMPI runs with %s version of MXM that is less than 3.2, " "so bulk connect/disconnect cannot work properly and will be turn off.", ompi_mtl_mxm.runtime_version); } } #endif return OMPI_SUCCESS; }
int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs) { spml_ikrit_mxm_ep_conn_info_t *ep_info = NULL; spml_ikrit_mxm_ep_conn_info_t *ep_hw_rdma_info = NULL; spml_ikrit_mxm_ep_conn_info_t my_ep_info = {{0}}; #if MXM_API < MXM_VERSION(2,0) mxm_conn_req_t *conn_reqs; int timeout; #else size_t mxm_addr_len = MXM_MAX_ADDR_LEN; #endif mxm_error_t err; size_t i, n; int rc = OSHMEM_ERROR; ompi_proc_t *proc_self; int my_rank = oshmem_my_proc_id(); OBJ_CONSTRUCT(&mca_spml_ikrit.active_peers, opal_list_t); /* Allocate connection requests */ #if MXM_API < MXM_VERSION(2,0) conn_reqs = malloc(nprocs * sizeof(mxm_conn_req_t)); if (NULL == conn_reqs) { rc = OSHMEM_ERR_OUT_OF_RESOURCE; goto bail; } memset(conn_reqs, 0x0, sizeof(mxm_conn_req_t)); #endif ep_info = calloc(sizeof(spml_ikrit_mxm_ep_conn_info_t), nprocs); if (NULL == ep_info) { rc = OSHMEM_ERR_OUT_OF_RESOURCE; goto bail; } if (mca_spml_ikrit.hw_rdma_channel) { ep_hw_rdma_info = calloc(sizeof(spml_ikrit_mxm_ep_conn_info_t), nprocs); if (NULL == ep_hw_rdma_info) { rc = OSHMEM_ERR_OUT_OF_RESOURCE; goto bail; } } mca_spml_ikrit.mxm_peers = (mxm_peer_t **) malloc(nprocs * sizeof(*(mca_spml_ikrit.mxm_peers))); if (NULL == mca_spml_ikrit.mxm_peers) { rc = OSHMEM_ERR_OUT_OF_RESOURCE; goto bail; } #if MXM_API < MXM_VERSION(2,0) if (OSHMEM_SUCCESS != spml_ikrit_get_ep_address(&my_ep_info, MXM_PTL_SELF)) { rc = OSHMEM_ERROR; goto bail; } if (OSHMEM_SUCCESS != spml_ikrit_get_ep_address(&my_ep_info, MXM_PTL_RDMA)) { rc = OSHMEM_ERROR; goto bail; } #else if (mca_spml_ikrit.hw_rdma_channel) { err = mxm_ep_get_address(mca_spml_ikrit.mxm_hw_rdma_ep, &my_ep_info.addr.ep_addr, &mxm_addr_len); if (MXM_OK != err) { orte_show_help("help-oshmem-spml-ikrit.txt", "unable to get endpoint address", true, mxm_error_string(err)); rc = OSHMEM_ERROR; goto bail; } oshmem_shmem_allgather(&my_ep_info, ep_hw_rdma_info, sizeof(spml_ikrit_mxm_ep_conn_info_t)); } err = mxm_ep_get_address(mca_spml_ikrit.mxm_ep, &my_ep_info.addr.ep_addr, &mxm_addr_len); if (MXM_OK != err) { orte_show_help("help-oshmem-spml-ikrit.txt", "unable to get endpoint address", true, mxm_error_string(err)); rc = OSHMEM_ERROR; goto bail; } #endif oshmem_shmem_allgather(&my_ep_info, ep_info, sizeof(spml_ikrit_mxm_ep_conn_info_t)); opal_progress_register(spml_ikrit_progress); /* Get the EP connection requests for all the processes from modex */ for (n = 0; n < nprocs; ++n) { /* mxm 2.0 keeps its connections on a list. Make sure * that list have different order on every rank */ i = (my_rank + n) % nprocs; mca_spml_ikrit.mxm_peers[i] = OBJ_NEW(mxm_peer_t); if (NULL == mca_spml_ikrit.mxm_peers[i]) { rc = OSHMEM_ERR_OUT_OF_RESOURCE; goto bail; } mca_spml_ikrit.mxm_peers[i]->pe = i; #if MXM_API < MXM_VERSION(2,0) conn_reqs[i].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *) &ep_info[i].addr.ptl_addr[MXM_PTL_SELF]; conn_reqs[i].ptl_addr[MXM_PTL_SHM] = NULL; conn_reqs[i].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *) &ep_info[i].addr.ptl_addr[MXM_PTL_RDMA]; #else err = mxm_ep_connect(mca_spml_ikrit.mxm_ep, ep_info[i].addr.ep_addr, &mca_spml_ikrit.mxm_peers[i]->mxm_conn); if (MXM_OK != err) { SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); goto bail; } if (OSHMEM_SUCCESS != create_ptl_idx(i)) goto bail; mxm_conn_ctx_set(mca_spml_ikrit.mxm_peers[i]->mxm_conn, mca_spml_ikrit.mxm_peers[i]); if (mca_spml_ikrit.hw_rdma_channel) { err = mxm_ep_connect(mca_spml_ikrit.mxm_hw_rdma_ep, ep_hw_rdma_info[i].addr.ep_addr, &mca_spml_ikrit.mxm_peers[i]->mxm_hw_rdma_conn); if (MXM_OK != err) { SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); goto bail; } } else { mca_spml_ikrit.mxm_peers[i]->mxm_hw_rdma_conn = mca_spml_ikrit.mxm_peers[i]->mxm_conn; } #endif } #if MXM_API < MXM_VERSION(2,0) /* Connect to remote peers */ if (mxm_get_version() < MXM_VERSION(1,5)) { timeout = 1000; } else { timeout = -1; } err = mxm_ep_connect(mca_spml_ikrit.mxm_ep, conn_reqs, nprocs, timeout); if (MXM_OK != err) { SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); for (i = 0; i < nprocs; ++i) { if (MXM_OK != conn_reqs[i].error) { SPML_ERROR("MXM EP connect to %s error: %s\n", procs[i]->proc_hostname, mxm_error_string(conn_reqs[i].error)); } } rc = OSHMEM_ERR_CONNECTION_FAILED; goto bail; } /* Save returned connections */ for (i = 0; i < nprocs; ++i) { mca_spml_ikrit.mxm_peers[i]->mxm_conn = conn_reqs[i].conn; if (OSHMEM_SUCCESS != create_ptl_idx(i)) { rc = OSHMEM_ERR_CONNECTION_FAILED; goto bail; } mxm_conn_ctx_set(conn_reqs[i].conn, mca_spml_ikrit.mxm_peers[i]); } if (conn_reqs) free(conn_reqs); #endif if (ep_info) free(ep_info); if (ep_hw_rdma_info) free(ep_hw_rdma_info); #if MXM_API >= MXM_VERSION(2,0) if (mca_spml_ikrit.bulk_connect) { /* Need a barrier to ensure remote peers already created connection */ oshmem_shmem_barrier(); mxm_ep_wireup(mca_spml_ikrit.mxm_ep); } #endif proc_self = oshmem_proc_group_find(oshmem_group_all, my_rank); /* identify local processes and change transport to SHM */ for (i = 0; i < nprocs; i++) { if (procs[i]->super.proc_name.jobid != proc_self->super.proc_name.jobid || !OPAL_PROC_ON_LOCAL_NODE(procs[i]->super.proc_flags)) { continue; } if (procs[i] == proc_self) continue; /* use zcopy for put/get via sysv shared memory */ OSHMEM_PROC_DATA(procs[i])->transport_ids[0] = MXM_PTL_SHM; OSHMEM_PROC_DATA(procs[i])->transport_ids[1] = MXM_PTL_RDMA; OSHMEM_PROC_DATA(procs[i])->num_transports = 2; } SPML_VERBOSE(50, "*** ADDED PROCS ***"); return OSHMEM_SUCCESS; bail: #if MXM_API < MXM_VERSION(2,0) if (conn_reqs) free(conn_reqs); #endif if (ep_info) free(ep_info); if (ep_hw_rdma_info) free(ep_hw_rdma_info); SPML_ERROR("add procs FAILED rc=%d", rc); return rc; }
"WARNING: MPICH was compiled with MXM version %d.%d but version %ld.%ld detected.", MXM_VERNO_MAJOR, MXM_VERNO_MINOR, (cur_ver >> MXM_MAJOR_BIT) & 0xff, (cur_ver >> MXM_MINOR_BIT) & 0xff)); } _mxm_obj.compiletime_version = MXM_VERNO_STRING; #if MXM_API >= MXM_VERSION(3,0) _mxm_obj.runtime_version = MPIU_Strdup(mxm_get_version_string()); #else _mxm_obj.runtime_version = MPIU_Malloc(sizeof(MXM_VERNO_STRING) + 10); snprintf(_mxm_obj.runtime_version, (sizeof(MXM_VERNO_STRING) + 9), "%ld.%ld", (cur_ver >> MXM_MAJOR_BIT) & 0xff, (cur_ver >> MXM_MINOR_BIT) & 0xff); #endif _mxm_obj.conf.bulk_connect = cur_ver < MXM_VERSION(3, 2) ? 0 : MPIR_CVAR_NEMESIS_MXM_BULK_CONNECT; _mxm_obj.conf.bulk_disconnect = cur_ver < MXM_VERSION(3, 2) ? 0 : MPIR_CVAR_NEMESIS_MXM_BULK_DISCONNECT; if (cur_ver < MXM_VERSION(3, 2) && (_mxm_obj.conf.bulk_connect || _mxm_obj.conf.bulk_disconnect)) { _mxm_obj.conf.bulk_connect = 0; _mxm_obj.conf.bulk_disconnect = 0; MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "WARNING: MPICH runs with %s version of MXM that is less than 3.2, " "so bulk connect/disconnect cannot work properly and will be turn off.", _mxm_obj.runtime_version)); } fn_exit: return mpi_errno;