static int send_ep_address(void) { mxm_error_t error; void *address; size_t addrlen; int rc; addrlen = 0; error = mxm_ep_get_address(ompi_pml_yalla.mxm_ep, NULL, &addrlen); PML_YALLA_ASSERT(error == MXM_ERR_BUFFER_TOO_SMALL); address = alloca(addrlen); error = mxm_ep_get_address(ompi_pml_yalla.mxm_ep, address, &addrlen); if (MXM_OK != error) { PML_YALLA_ERROR("Failed to get EP address"); return OMPI_ERROR; } OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, &mca_pml_yalla_component.pmlm_version, address, addrlen); if (OMPI_SUCCESS != rc) { PML_YALLA_ERROR("Open MPI couldn't distribute EP connection details"); return OMPI_ERROR; } return OMPI_SUCCESS; }
/************************** * usNIC BTL-specific functions to hide differences between master and * v1.8 **************************/ void usnic_compat_modex_send(int *rc, mca_base_component_t *component, opal_btl_usnic_modex_t *modexes, size_t size) { OPAL_MODEX_SEND(*rc, OPAL_PMIX_REMOTE, component, modexes, size); }
static int mca_btl_scif_modex_send (void) { mca_btl_scif_modex_t modex; int rc; memset(&modex, 0, sizeof(mca_btl_scif_modex_t)); modex.port_id = mca_btl_scif_module.port_id; OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_LOCAL, &mca_btl_scif_component.super.btl_version, &modex, sizeof (modex)); return rc; }
/* * Send local device information and other information * required for setup */ static int opal_common_ugni_send_modex (int my_cdm_id) { uint32_t modex_size, total_msg_size, msg_offset; struct opal_common_ugni_modex_t modex; char *modex_msg; int rc, i; modex_size = sizeof (struct opal_common_ugni_modex_t); total_msg_size = opal_common_ugni_module.device_count * modex_size; modex_msg = (char *) malloc (total_msg_size); if (NULL == modex_msg) { OPAL_OUTPUT((-1, "Error allocating memory for modex @ %s:%d", __FILE__, __LINE__)); return OPAL_ERR_OUT_OF_RESOURCE; } /* pack modex for all available devices */ for (i = 0, msg_offset = 0; i < opal_common_ugni_module.device_count ; ++i) { opal_common_ugni_device_t *dev = opal_common_ugni_module.devices + i; modex.addr = dev->dev_addr; modex.id = my_cdm_id; memcpy ((void *)((uintptr_t) modex_msg + msg_offset), (void *)&modex, modex_size); msg_offset += modex_size; } /* * need global for edge cases like MPI_Comm_spawn support with * new ranks started on the same nodes as the spawnee ranks, etc. */ OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &opal_common_ugni_component, modex_msg, total_msg_size); free(modex_msg); return rc; }
/* * Send local device information and other information * required for setup */ static int opal_common_ugni_send_modex (int my_cdm_id) { uint32_t modex_size, total_msg_size, msg_offset; struct opal_common_ugni_modex_t modex; char *modex_msg; int rc, i; modex_size = sizeof (struct opal_common_ugni_modex_t); total_msg_size = opal_common_ugni_module.device_count * modex_size; modex_msg = (char *) malloc (total_msg_size); if (NULL == modex_msg) { OPAL_OUTPUT((-1, "Error allocating memory for modex @ %s:%d", __FILE__, __LINE__)); return OPAL_ERR_OUT_OF_RESOURCE; } /* pack modex for all available devices */ for (i = 0, msg_offset = 0; i < opal_common_ugni_module.device_count ; ++i) { opal_common_ugni_device_t *dev = opal_common_ugni_module.devices + i; modex.addr = dev->dev_addr; modex.id = my_cdm_id; memcpy ((void *)((uintptr_t) modex_msg + msg_offset), (void *)&modex, modex_size); msg_offset += modex_size; } OPAL_MODEX_SEND(rc, PMIX_ASYNC_RDY, PMIX_REMOTE, &opal_common_ugni_component, modex_msg, total_msg_size); free(modex_msg); return rc; }
static int mca_pml_ucx_send_worker_address(void) { ucp_address_t *address; ucs_status_t status; size_t addrlen; int rc; status = ucp_worker_get_address(ompi_pml_ucx.ucp_worker, &address, &addrlen); if (UCS_OK != status) { PML_UCX_ERROR("Failed to get worker address"); return OMPI_ERROR; } OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &mca_pml_ucx_component.pmlm_version, (void*)address, addrlen); if (OMPI_SUCCESS != rc) { PML_UCX_ERROR("Open MPI couldn't distribute EP connection details"); return OMPI_ERROR; } ucp_worker_release_address(ompi_pml_ucx.ucp_worker, address); return OMPI_SUCCESS; }
int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) { psm2_error_t err; psm2_ep_t ep; /* endpoint handle */ psm2_mq_t mq; psm2_epid_t epid; /* unique lid+port identifier */ psm2_uuid_t unique_job_key; struct psm2_ep_open_opts ep_opt; unsigned long long *uu = (unsigned long long *) unique_job_key; char *generated_key; char env_string[256]; int rc; generated_key = getenv("OMPI_MCA_orte_precondition_transports"); memset(uu, 0, sizeof(psm2_uuid_t)); if (!generated_key || (strlen(generated_key) != 33) || sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2) { opal_show_help("help-mtl-psm2.txt", "no uuid present", true, generated_key ? "could not be parsed from" : "not present in", ompi_process_info.nodename); return OMPI_ERROR; } /* Handle our own errors for opening endpoints */ psm2_error_register_handler(ompi_mtl_psm2.ep, ompi_mtl_psm2_errhandler); /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM2 can allocate hardware * contexts correctly. */ snprintf(env_string, sizeof(env_string), "%d", local_rank); setenv("MPI_LOCALRANKID", env_string, 0); snprintf(env_string, sizeof(env_string), "%d", num_local_procs); setenv("MPI_LOCALNRANKS", env_string, 0); /* Setup the endpoint options. */ psm2_ep_open_opts_get_defaults(&ep_opt); ep_opt.timeout = ompi_mtl_psm2.connect_timeout * 1e9; ep_opt.affinity = PSM2_EP_OPEN_AFFINITY_SKIP; /* do not let PSM2 set affinity */ /* Open PSM2 endpoint */ err = psm2_ep_open(unique_job_key, &ep_opt, &ep, &epid); if (err) { opal_show_help("help-mtl-psm2.txt", "unable to open endpoint", true, psm2_error_get_string(err)); return OMPI_ERROR; } /* Future errors are handled by the default error handler */ psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT); err = psm2_mq_init(ep, 0xffff000000000000ULL, NULL, 0, &mq); if (err) { opal_show_help("help-mtl-psm2.txt", "psm2 init", true, psm2_error_get_string(err)); return OMPI_ERROR; } ompi_mtl_psm2.ep = ep; ompi_mtl_psm2.epid = epid; ompi_mtl_psm2.mq = mq; OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &mca_mtl_psm2_component.super.mtl_version, &ompi_mtl_psm2.epid, sizeof(psm2_epid_t)); if (OMPI_SUCCESS != rc) { opal_output(0, "Open MPI couldn't send PSM2 epid to head node process"); return OMPI_ERROR; } /* register the psm2 progress function */ opal_progress_register(ompi_mtl_psm2_progress); return OMPI_SUCCESS; }