int mca_pml_yalla_add_procs(struct ompi_proc_t **procs, size_t nprocs) { size_t i; int ret; void *address; mxm_conn_h conn; size_t addrlen; mxm_error_t error; if (OMPI_SUCCESS != (ret = mca_pml_base_pml_check_selected("yalla", procs, nprocs))) { return ret; } for (i = 0; i < nprocs; ++i) { ret = recv_ep_address(procs[i], &address, &addrlen); if (ret < 0) { return ret; } if (procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML]) { PML_YALLA_VERBOSE(3, "already connected to proc. %s", OPAL_NAME_PRINT(procs[i]->super.proc_name)); continue; } PML_YALLA_VERBOSE(2, "connecting to proc. %s", OPAL_NAME_PRINT(procs[i]->super.proc_name)); error = mxm_ep_connect(ompi_pml_yalla.mxm_ep, address, &conn); free(address); if (MXM_OK != error) { PML_YALLA_ERROR("Failed to connect"); return OMPI_ERROR; } procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML] = conn; } return OMPI_SUCCESS; }
int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, struct ompi_proc_t** procs) { #if MXM_API < MXM_VERSION(2,0) ompi_mtl_mxm_ep_conn_info_t *ep_info; mxm_conn_req_t *conn_reqs; size_t ep_index = 0; #endif void *ep_address; size_t ep_address_len; mxm_error_t err; size_t i; int rc; mca_mtl_mxm_endpoint_t *endpoint; assert(mtl == &ompi_mtl_mxm.super); #if MXM_API < MXM_VERSION(2,0) /* Allocate connection requests */ conn_reqs = calloc(nprocs, sizeof(mxm_conn_req_t)); ep_info = calloc(nprocs, sizeof(ompi_mtl_mxm_ep_conn_info_t)); if (NULL == conn_reqs || NULL == ep_info) { rc = OMPI_ERR_OUT_OF_RESOURCE; goto bail; } #endif /* Get the EP connection requests for all the processes from modex */ for (i = 0; i < nprocs; ++i) { if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) { continue; /* already connected to this endpoint */ } rc = ompi_mtl_mxm_recv_ep_address(procs[i], &ep_address, &ep_address_len); if (rc != OMPI_SUCCESS) { goto bail; } #if MXM_API < MXM_VERSION(2,0) if (ep_address_len != sizeof(ep_info[i])) { MXM_ERROR("Invalid endpoint address length"); rc = OMPI_ERROR; goto bail; } memcpy(&ep_info[i], ep_address, ep_address_len); conn_reqs[ep_index].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SELF]); conn_reqs[ep_index].ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SHM]); conn_reqs[ep_index].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_RDMA]); ep_index++; #else endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t); endpoint->mtl_mxm_module = &ompi_mtl_mxm; err = mxm_ep_connect(ompi_mtl_mxm.ep, ep_address, &endpoint->mxm_conn); if (err != MXM_OK) { MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); rc = OMPI_ERROR; goto bail; } procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint; #endif free(ep_address); } #if MXM_API < MXM_VERSION(2,0) /* Connect to remote peers */ err = mxm_ep_connect(ompi_mtl_mxm.ep, conn_reqs, ep_index, -1); if (MXM_OK != err) { MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); for (i = 0; i < ep_index; ++i) { if (MXM_OK != conn_reqs[i].error) { MXM_ERROR("MXM EP connect to %s error: %s\n", (NULL == procs[i]->proc_hostname) ? "unknown" : procs[i]->proc_hostname, mxm_error_string(conn_reqs[i].error)); } } rc = OMPI_ERROR; goto bail; } /* Save returned connections */ for (i = 0; i < ep_index; ++i) { endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t); endpoint->mtl_mxm_module = &ompi_mtl_mxm; endpoint->mxm_conn = conn_reqs[i].conn; procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint; } #endif rc = OMPI_SUCCESS; bail: #if MXM_API < MXM_VERSION(2,0) free(conn_reqs); free(ep_info); #endif return rc; }
int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, struct ompi_proc_t** procs, /*const*/ struct mca_mtl_base_endpoint_t **mtl_peer_data) { #if MXM_API < MXM_VERSION(2,0) ompi_mtl_mxm_ep_conn_info_t *ep_info; mxm_conn_req_t *conn_reqs; int timeout; #endif void *ep_address; size_t ep_address_len; mxm_error_t err; size_t i; int rc; assert(mtl == &ompi_mtl_mxm.super); #if MXM_API < MXM_VERSION(2,0) /* Allocate connection requests */ conn_reqs = calloc(nprocs, sizeof(mxm_conn_req_t)); ep_info = calloc(nprocs, sizeof(ompi_mtl_mxm_ep_conn_info_t)); if (NULL == conn_reqs || NULL == ep_info) { rc = OMPI_ERR_OUT_OF_RESOURCE; goto bail; } #endif /* Get the EP connection requests for all the processes from modex */ for (i = 0; i < nprocs; ++i) { rc = ompi_mtl_mxm_recv_ep_address(procs[i], &ep_address, &ep_address_len); if (rc != OMPI_SUCCESS) { goto bail; } #if MXM_API < MXM_VERSION(2,0) if (ep_address_len != sizeof(ep_info[i])) { MXM_ERROR("Invalid endpoint address length"); rc = OMPI_ERROR; goto bail; } memcpy(&ep_info[i], ep_address, ep_address_len); conn_reqs[i].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SELF]); conn_reqs[i].ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SHM]); conn_reqs[i].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_RDMA]); #else mtl_peer_data[i] = (mca_mtl_mxm_endpoint_t *) OBJ_NEW(mca_mtl_mxm_endpoint_t); mtl_peer_data[i]->mtl_mxm_module = &ompi_mtl_mxm; err = mxm_ep_connect(ompi_mtl_mxm.ep, ep_address, &mtl_peer_data[i]->mxm_conn); if (err != MXM_OK) { MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); rc = OMPI_ERROR; goto bail; } #endif free(ep_address); } #if MXM_API < MXM_VERSION(2,0) /* Connect to remote peers */ timeout = (mxm_get_version() < MXM_VERSION(1,5)) ? 1000 : -1; err = mxm_ep_connect(ompi_mtl_mxm.ep, conn_reqs, nprocs, timeout); if (MXM_OK != err) { MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); for (i = 0; i < nprocs; ++i) { if (MXM_OK != conn_reqs[i].error) { MXM_ERROR("MXM EP connect to %s error: %s\n", procs[i]->proc_hostname, mxm_error_string(conn_reqs[i].error)); } } rc = OMPI_ERROR; goto bail; } /* Save returned connections */ for (i = 0; i < nprocs; ++i) { mtl_peer_data[i] = (mca_mtl_mxm_endpoint_t *) OBJ_NEW(mca_mtl_mxm_endpoint_t); mtl_peer_data[i]->mtl_mxm_module = &ompi_mtl_mxm; mtl_peer_data[i]->mxm_conn = conn_reqs[i].conn; } #endif rc = OMPI_SUCCESS; bail: #if MXM_API < MXM_VERSION(2,0) free(conn_reqs); free(ep_info); #endif return rc; }
int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs) { spml_ikrit_mxm_ep_conn_info_t *ep_info = NULL; spml_ikrit_mxm_ep_conn_info_t *ep_hw_rdma_info = NULL; spml_ikrit_mxm_ep_conn_info_t my_ep_info; size_t mxm_addr_len = MXM_MAX_ADDR_LEN; mxm_error_t err; size_t i, n; int rc = OSHMEM_ERROR; ompi_proc_t *proc_self; int my_rank = oshmem_my_proc_id(); OBJ_CONSTRUCT(&mca_spml_ikrit.active_peers, opal_list_t); /* Allocate connection requests */ ep_info = calloc(sizeof(spml_ikrit_mxm_ep_conn_info_t), nprocs); if (NULL == ep_info) { rc = OSHMEM_ERR_OUT_OF_RESOURCE; goto bail; } if (mca_spml_ikrit.hw_rdma_channel) { ep_hw_rdma_info = calloc(sizeof(spml_ikrit_mxm_ep_conn_info_t), nprocs); if (NULL == ep_hw_rdma_info) { rc = OSHMEM_ERR_OUT_OF_RESOURCE; goto bail; } } mca_spml_ikrit.mxm_peers = (mxm_peer_t *) calloc(nprocs , sizeof(mxm_peer_t)); if (NULL == mca_spml_ikrit.mxm_peers) { rc = OSHMEM_ERR_OUT_OF_RESOURCE; goto bail; } memset(&my_ep_info, 0, sizeof(my_ep_info)); if (mca_spml_ikrit.hw_rdma_channel) { err = mxm_ep_get_address(mca_spml_ikrit.mxm_hw_rdma_ep, &my_ep_info.addr.ep_addr, &mxm_addr_len); if (MXM_OK != err) { orte_show_help("help-oshmem-spml-ikrit.txt", "unable to get endpoint address", true, mxm_error_string(err)); rc = OSHMEM_ERROR; goto bail; } oshmem_shmem_allgather(&my_ep_info, ep_hw_rdma_info, sizeof(spml_ikrit_mxm_ep_conn_info_t)); } err = mxm_ep_get_address(mca_spml_ikrit.mxm_ep, &my_ep_info.addr.ep_addr, &mxm_addr_len); if (MXM_OK != err) { orte_show_help("help-oshmem-spml-ikrit.txt", "unable to get endpoint address", true, mxm_error_string(err)); rc = OSHMEM_ERROR; goto bail; } oshmem_shmem_allgather(&my_ep_info, ep_info, sizeof(spml_ikrit_mxm_ep_conn_info_t)); opal_progress_register(spml_ikrit_progress); /* Get the EP connection requests for all the processes from modex */ for (n = 0; n < nprocs; ++n) { /* mxm 2.0 keeps its connections on a list. Make sure * that list have different order on every rank */ i = (my_rank + n) % nprocs; mxm_peer_construct(&mca_spml_ikrit.mxm_peers[i]); err = mxm_ep_connect(mca_spml_ikrit.mxm_ep, ep_info[i].addr.ep_addr, &mca_spml_ikrit.mxm_peers[i].mxm_conn); if (MXM_OK != err) { SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); goto bail; } mxm_conn_ctx_set(mca_spml_ikrit.mxm_peers[i].mxm_conn, &mca_spml_ikrit.mxm_peers[i]); if (mca_spml_ikrit.hw_rdma_channel) { err = mxm_ep_connect(mca_spml_ikrit.mxm_hw_rdma_ep, ep_hw_rdma_info[i].addr.ep_addr, &mca_spml_ikrit.mxm_peers[i].mxm_hw_rdma_conn); if (MXM_OK != err) { SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); goto bail; } } else { mca_spml_ikrit.mxm_peers[i].mxm_hw_rdma_conn = mca_spml_ikrit.mxm_peers[i].mxm_conn; } } if (ep_info) free(ep_info); if (ep_hw_rdma_info) free(ep_hw_rdma_info); if (mca_spml_ikrit.bulk_connect) { /* Need a barrier to ensure remote peers already created connection */ oshmem_shmem_barrier(); mxm_ep_wireup(mca_spml_ikrit.mxm_ep); } proc_self = oshmem_proc_group_find(oshmem_group_all, my_rank); /* identify local processes and change transport to SHM */ for (i = 0; i < nprocs; i++) { if (procs[i]->super.proc_name.jobid != proc_self->super.proc_name.jobid || !OPAL_PROC_ON_LOCAL_NODE(procs[i]->super.proc_flags)) { continue; } if (procs[i] == proc_self) continue; /* use zcopy for put/get via sysv shared memory with fallback to RDMA */ mca_spml_ikrit.mxm_peers[i].ptl_id = MXM_PTL_SHM; } SPML_VERBOSE(50, "*** ADDED PROCS ***"); return OSHMEM_SUCCESS; bail: if (ep_info) free(ep_info); if (ep_hw_rdma_info) free(ep_hw_rdma_info); SPML_ERROR("add procs FAILED rc=%d", rc); return rc; }
int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs) { spml_ikrit_mxm_ep_conn_info_t *ep_info = NULL; spml_ikrit_mxm_ep_conn_info_t *ep_hw_rdma_info = NULL; spml_ikrit_mxm_ep_conn_info_t my_ep_info = {{0}}; #if MXM_API < MXM_VERSION(2,0) mxm_conn_req_t *conn_reqs; int timeout; #else size_t mxm_addr_len = MXM_MAX_ADDR_LEN; #endif mxm_error_t err; size_t i, n; int rc = OSHMEM_ERROR; ompi_proc_t *proc_self; int my_rank = oshmem_my_proc_id(); OBJ_CONSTRUCT(&mca_spml_ikrit.active_peers, opal_list_t); /* Allocate connection requests */ #if MXM_API < MXM_VERSION(2,0) conn_reqs = malloc(nprocs * sizeof(mxm_conn_req_t)); if (NULL == conn_reqs) { rc = OSHMEM_ERR_OUT_OF_RESOURCE; goto bail; } memset(conn_reqs, 0x0, sizeof(mxm_conn_req_t)); #endif ep_info = calloc(sizeof(spml_ikrit_mxm_ep_conn_info_t), nprocs); if (NULL == ep_info) { rc = OSHMEM_ERR_OUT_OF_RESOURCE; goto bail; } if (mca_spml_ikrit.hw_rdma_channel) { ep_hw_rdma_info = calloc(sizeof(spml_ikrit_mxm_ep_conn_info_t), nprocs); if (NULL == ep_hw_rdma_info) { rc = OSHMEM_ERR_OUT_OF_RESOURCE; goto bail; } } mca_spml_ikrit.mxm_peers = (mxm_peer_t **) malloc(nprocs * sizeof(*(mca_spml_ikrit.mxm_peers))); if (NULL == mca_spml_ikrit.mxm_peers) { rc = OSHMEM_ERR_OUT_OF_RESOURCE; goto bail; } #if MXM_API < MXM_VERSION(2,0) if (OSHMEM_SUCCESS != spml_ikrit_get_ep_address(&my_ep_info, MXM_PTL_SELF)) { rc = OSHMEM_ERROR; goto bail; } if (OSHMEM_SUCCESS != spml_ikrit_get_ep_address(&my_ep_info, MXM_PTL_RDMA)) { rc = OSHMEM_ERROR; goto bail; } #else if (mca_spml_ikrit.hw_rdma_channel) { err = mxm_ep_get_address(mca_spml_ikrit.mxm_hw_rdma_ep, &my_ep_info.addr.ep_addr, &mxm_addr_len); if (MXM_OK != err) { orte_show_help("help-oshmem-spml-ikrit.txt", "unable to get endpoint address", true, mxm_error_string(err)); rc = OSHMEM_ERROR; goto bail; } oshmem_shmem_allgather(&my_ep_info, ep_hw_rdma_info, sizeof(spml_ikrit_mxm_ep_conn_info_t)); } err = mxm_ep_get_address(mca_spml_ikrit.mxm_ep, &my_ep_info.addr.ep_addr, &mxm_addr_len); if (MXM_OK != err) { orte_show_help("help-oshmem-spml-ikrit.txt", "unable to get endpoint address", true, mxm_error_string(err)); rc = OSHMEM_ERROR; goto bail; } #endif oshmem_shmem_allgather(&my_ep_info, ep_info, sizeof(spml_ikrit_mxm_ep_conn_info_t)); opal_progress_register(spml_ikrit_progress); /* Get the EP connection requests for all the processes from modex */ for (n = 0; n < nprocs; ++n) { /* mxm 2.0 keeps its connections on a list. Make sure * that list have different order on every rank */ i = (my_rank + n) % nprocs; mca_spml_ikrit.mxm_peers[i] = OBJ_NEW(mxm_peer_t); if (NULL == mca_spml_ikrit.mxm_peers[i]) { rc = OSHMEM_ERR_OUT_OF_RESOURCE; goto bail; } mca_spml_ikrit.mxm_peers[i]->pe = i; #if MXM_API < MXM_VERSION(2,0) conn_reqs[i].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *) &ep_info[i].addr.ptl_addr[MXM_PTL_SELF]; conn_reqs[i].ptl_addr[MXM_PTL_SHM] = NULL; conn_reqs[i].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *) &ep_info[i].addr.ptl_addr[MXM_PTL_RDMA]; #else err = mxm_ep_connect(mca_spml_ikrit.mxm_ep, ep_info[i].addr.ep_addr, &mca_spml_ikrit.mxm_peers[i]->mxm_conn); if (MXM_OK != err) { SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); goto bail; } if (OSHMEM_SUCCESS != create_ptl_idx(i)) goto bail; mxm_conn_ctx_set(mca_spml_ikrit.mxm_peers[i]->mxm_conn, mca_spml_ikrit.mxm_peers[i]); if (mca_spml_ikrit.hw_rdma_channel) { err = mxm_ep_connect(mca_spml_ikrit.mxm_hw_rdma_ep, ep_hw_rdma_info[i].addr.ep_addr, &mca_spml_ikrit.mxm_peers[i]->mxm_hw_rdma_conn); if (MXM_OK != err) { SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); goto bail; } } else { mca_spml_ikrit.mxm_peers[i]->mxm_hw_rdma_conn = mca_spml_ikrit.mxm_peers[i]->mxm_conn; } #endif } #if MXM_API < MXM_VERSION(2,0) /* Connect to remote peers */ if (mxm_get_version() < MXM_VERSION(1,5)) { timeout = 1000; } else { timeout = -1; } err = mxm_ep_connect(mca_spml_ikrit.mxm_ep, conn_reqs, nprocs, timeout); if (MXM_OK != err) { SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); for (i = 0; i < nprocs; ++i) { if (MXM_OK != conn_reqs[i].error) { SPML_ERROR("MXM EP connect to %s error: %s\n", procs[i]->proc_hostname, mxm_error_string(conn_reqs[i].error)); } } rc = OSHMEM_ERR_CONNECTION_FAILED; goto bail; } /* Save returned connections */ for (i = 0; i < nprocs; ++i) { mca_spml_ikrit.mxm_peers[i]->mxm_conn = conn_reqs[i].conn; if (OSHMEM_SUCCESS != create_ptl_idx(i)) { rc = OSHMEM_ERR_CONNECTION_FAILED; goto bail; } mxm_conn_ctx_set(conn_reqs[i].conn, mca_spml_ikrit.mxm_peers[i]); } if (conn_reqs) free(conn_reqs); #endif if (ep_info) free(ep_info); if (ep_hw_rdma_info) free(ep_hw_rdma_info); #if MXM_API >= MXM_VERSION(2,0) if (mca_spml_ikrit.bulk_connect) { /* Need a barrier to ensure remote peers already created connection */ oshmem_shmem_barrier(); mxm_ep_wireup(mca_spml_ikrit.mxm_ep); } #endif proc_self = oshmem_proc_group_find(oshmem_group_all, my_rank); /* identify local processes and change transport to SHM */ for (i = 0; i < nprocs; i++) { if (procs[i]->super.proc_name.jobid != proc_self->super.proc_name.jobid || !OPAL_PROC_ON_LOCAL_NODE(procs[i]->super.proc_flags)) { continue; } if (procs[i] == proc_self) continue; /* use zcopy for put/get via sysv shared memory */ OSHMEM_PROC_DATA(procs[i])->transport_ids[0] = MXM_PTL_SHM; OSHMEM_PROC_DATA(procs[i])->transport_ids[1] = MXM_PTL_RDMA; OSHMEM_PROC_DATA(procs[i])->num_transports = 2; } SPML_VERBOSE(50, "*** ADDED PROCS ***"); return OSHMEM_SUCCESS; bail: #if MXM_API < MXM_VERSION(2,0) if (conn_reqs) free(conn_reqs); #endif if (ep_info) free(ep_info); if (ep_hw_rdma_info) free(ep_hw_rdma_info); SPML_ERROR("add procs FAILED rc=%d", rc); return rc; }