static int mca_btl_ugni_endpoint_get_modex (mca_btl_base_endpoint_t *ep) { mca_btl_ugni_modex_t *modex; size_t msg_size; int rc; assert (NULL != ep && NULL != ep->peer_proc); /* Receive the modex */ OPAL_MODEX_RECV(rc, &mca_btl_ugni_component.super.btl_version, &ep->peer_proc->proc_name, (void **)&modex, &msg_size); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { BTL_ERROR(("error receiving modex")); return rc; } ep->ep_rem_addr = modex->addr; ep->ep_rem_id = modex->id; BTL_VERBOSE(("received modex for ep %p. addr: %d, id: %d", (void*)ep, ep->ep_rem_addr, ep->ep_rem_id)); free (modex); return OPAL_SUCCESS; }
static int create_peer_and_endpoint(int interface, opal_proc_t *proc, ptl_process_t *phys_peer, mca_btl_base_endpoint_t **endpoint) { int ret; size_t size; ptl_process_t *id; OPAL_MODEX_RECV(ret, &mca_btl_portals4_component.super.btl_version, &proc->proc_name, (void**) &id, &size); if (OPAL_ERR_NOT_FOUND == ret) { OPAL_OUTPUT_VERBOSE((30, opal_btl_base_framework.framework_output, "btl/portals4: Portals 4 BTL not available on peer: %s", opal_strerror(ret))); return ret; } else if (OPAL_SUCCESS != ret) { opal_output_verbose(0, opal_btl_base_framework.framework_output, "btl/portals4: opal_modex_recv failed: %s", opal_strerror(ret)); return ret; } if (size < sizeof(ptl_process_t)) { /* no available connection */ return OPAL_ERROR; } if ((size % sizeof(ptl_process_t)) != 0) { opal_output_verbose(0, opal_btl_base_framework.framework_output, "btl/portals4: invalid format in modex"); return OPAL_ERROR; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "btl/portals4: %d NI(s) declared in the modex", (int) (size/sizeof(ptl_process_t)))); /* * check if create_endpoint() already created the endpoint. * if not, create it here. */ if (NULL == *endpoint) { *endpoint = malloc(sizeof(mca_btl_base_endpoint_t)); if (NULL == *endpoint) { return OPAL_ERR_OUT_OF_RESOURCE; } } /* * regardless of who created the endpoint, set the rank here * because we are using logical mapping. */ (*endpoint)->ptl_proc.rank = proc->proc_name.vpid; phys_peer->phys.pid = id[interface].phys.pid; phys_peer->phys.nid = id[interface].phys.nid; opal_output_verbose(50, opal_btl_base_framework.framework_output, "logical: global rank=%d pid=%d nid=%d\n", proc->proc_name.vpid, phys_peer->phys.pid, phys_peer->phys.nid); return OPAL_SUCCESS; }
void usnic_compat_modex_recv(int *rc, mca_base_component_t *component, opal_proc_t *proc, opal_btl_usnic_modex_t **modexes, size_t *size) { OPAL_MODEX_RECV(*rc, component, &proc->proc_name, (uint8_t**) modexes, size); }
static int recv_ep_address(ompi_proc_t *proc, void **address_p, size_t *addrlen_p) { int rc; OPAL_MODEX_RECV(rc, &mca_pml_yalla_component.pmlm_version, &proc->super, address_p, addrlen_p); if (rc < 0) { PML_YALLA_ERROR("Failed to receive EP address"); } return rc; }
static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_proc_t *proc, int remote_rank) { mca_btl_vader_component_t *component = &mca_btl_vader_component; union vader_modex_t *modex; size_t msg_size; int rc; OBJ_CONSTRUCT(ep, mca_btl_vader_endpoint_t); ep->peer_smp_rank = remote_rank; if (remote_rank != MCA_BTL_VADER_LOCAL_RANK) { OPAL_MODEX_RECV(rc, &component->super.btl_version, &proc->proc_name, (void **) &modex, &msg_size); if (OPAL_SUCCESS != rc) { return rc; } /* attatch to the remote segment */ #if OPAL_BTL_VADER_HAVE_XPMEM if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) { /* always use xpmem if it is available */ ep->segment_data.xpmem.apid = xpmem_get (modex->xpmem.seg_id, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void *) 0666); ep->segment_data.xpmem.rcache = mca_rcache_base_module_create("vma"); (void) vader_get_registation (ep, modex->xpmem.segment_base, mca_btl_vader_component.segment_size, MCA_MPOOL_FLAGS_PERSIST, (void **) &ep->segment_base); } else { #endif /* store a copy of the segment information for detach */ ep->segment_data.other.seg_ds = malloc (msg_size); if (NULL == ep->segment_data.other.seg_ds) { return OPAL_ERR_OUT_OF_RESOURCE; } memcpy (ep->segment_data.other.seg_ds, &modex->seg_ds, msg_size); ep->segment_base = opal_shmem_segment_attach (ep->segment_data.other.seg_ds); if (NULL == ep->segment_base) { return OPAL_ERROR; } #if OPAL_BTL_VADER_HAVE_XPMEM } #endif OBJ_CONSTRUCT(&ep->lock, opal_mutex_t); free (modex); } else { /* set up the segment base so we can calculate a virtual to real for local pointers */ ep->segment_base = component->my_segment; } ep->fifo = (struct vader_fifo_t *) ep->segment_base; return OPAL_SUCCESS; }
static int mca_pml_ucx_recv_worker_address(ompi_proc_t *proc, ucp_address_t **address_p, size_t *addrlen_p) { int ret; *address_p = NULL; OPAL_MODEX_RECV(ret, &mca_pml_ucx_component.pmlm_version, &proc->super.proc_name, (void**)address_p, addrlen_p); if (ret < 0) { PML_UCX_ERROR("Failed to receive EP address"); } return ret; }
static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_proc_t *proc, int remote_rank) { const int fbox_in_offset = MCA_BTL_VADER_LOCAL_RANK - (MCA_BTL_VADER_LOCAL_RANK > remote_rank); const int fbox_out_offset = remote_rank - (MCA_BTL_VADER_LOCAL_RANK < remote_rank); mca_btl_vader_component_t *component = &mca_btl_vader_component; struct vader_modex_t *modex; size_t msg_size; int rc; ep->peer_smp_rank = remote_rank; if (remote_rank != MCA_BTL_VADER_LOCAL_RANK) { OPAL_MODEX_RECV(rc, &component->super.btl_version, proc, (uint8_t**)&modex, &msg_size); if (OPAL_SUCCESS != rc) { return rc; } /* attatch to the remote segment */ #if OPAL_BTL_VADER_HAVE_XPMEM /* always use xpmem if it is available */ ep->apid = xpmem_get (modex->seg_id, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void *) 0666); ep->rcache = mca_rcache_base_module_create("vma"); (void) vader_get_registation (ep, modex->segment_base, mca_btl_vader_component.segment_size, MCA_MPOOL_FLAGS_PERSIST, (void **) &ep->segment_base); #else msg_size -= offsetof (struct vader_modex_t, seg_ds); memcpy (&ep->seg_ds, &modex->seg_ds, msg_size); ep->segment_base = opal_shmem_segment_attach (&ep->seg_ds); if (NULL == ep->segment_base) { return rc; } #endif free (modex); ep->next_fbox_out = 0; ep->next_fbox_in = 0; ep->next_sequence = 0; ep->expected_sequence = 0; ep->fbox_in = (struct mca_btl_vader_fbox_t * restrict) (ep->segment_base + MCA_BTL_VADER_FIFO_SIZE + fbox_in_offset * MCA_BTL_VADER_FBOX_PEER_SIZE); ep->fbox_out = (struct mca_btl_vader_fbox_t * restrict) (component->my_segment + MCA_BTL_VADER_FIFO_SIZE + fbox_out_offset * MCA_BTL_VADER_FBOX_PEER_SIZE); } else {
static int create_endpoint(int interface, opal_proc_t *proc, mca_btl_base_endpoint_t **endpoint) { int ret; size_t size; ptl_process_t *id; OPAL_MODEX_RECV(ret, &mca_btl_portals4_component.super.btl_version, &proc->proc_name, (void**) &id, &size); if (OPAL_ERR_NOT_FOUND == ret) { OPAL_OUTPUT_VERBOSE((30, opal_btl_base_framework.framework_output, "btl/portals4: Portals 4 BTL not available on peer: %s", opal_strerror(ret))); return ret; } else if (OPAL_SUCCESS != ret) { opal_output_verbose(0, opal_btl_base_framework.framework_output, "btl/portals4: opal_modex_recv failed: %s", opal_strerror(ret)); return ret; } if (size < sizeof(ptl_process_t)) { /* no available connection */ return OPAL_ERROR; } if ((size % sizeof(ptl_process_t)) != 0) { opal_output_verbose(0, opal_btl_base_framework.framework_output, "btl/portals4: invalid format in modex"); return OPAL_ERROR; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "btl/portals4: %d NI(s) declared in the modex", (int) (size/sizeof(ptl_process_t)))); *endpoint = malloc(sizeof(mca_btl_base_endpoint_t)); if (NULL == *endpoint) { return OPAL_ERR_OUT_OF_RESOURCE; } (*endpoint)->ptl_proc = id[interface]; return OPAL_SUCCESS; }
int opal_common_ugni_endpoint_for_proc (opal_common_ugni_device_t *dev, opal_proc_t *peer_proc, opal_common_ugni_endpoint_t **ep) { opal_common_ugni_endpoint_t *endpoint; opal_common_ugni_modex_t *modex; size_t msg_size; int rc; assert (NULL != dev && NULL != ep && peer_proc); endpoint = OBJ_NEW(opal_common_ugni_endpoint_t); if (OPAL_UNLIKELY(NULL == endpoint)) { assert (0); return OPAL_ERR_OUT_OF_RESOURCE; } /* Receive the modex */ OPAL_MODEX_RECV(rc, &opal_common_ugni_component, peer_proc, &modex, &msg_size); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { OPAL_OUTPUT((-1, "btl/ugni error receiving modex")); return rc; } endpoint->ep_rem_addr = modex->addr; endpoint->ep_rem_id = modex->id; endpoint->ep_rem_irq_memhndl = modex->irq_memhndl; endpoint->dev = dev; *ep = endpoint; free (modex); return OPAL_SUCCESS; }
int ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, struct ompi_proc_t** procs) { int i,j; int rc; psm2_epid_t *epids_in = NULL; int *mask_in = NULL; psm2_epid_t *epid; psm2_epaddr_t *epaddrs_out = NULL; psm2_error_t *errs_out = NULL, err; size_t size; int proc_errors[PSM2_ERROR_LAST] = { 0 }; int timeout_in_secs; assert(mtl == &ompi_mtl_psm2.super); rc = OMPI_ERR_OUT_OF_RESOURCE; errs_out = (psm2_error_t *) malloc(nprocs * sizeof(psm2_error_t)); if (errs_out == NULL) { goto bail; } epids_in = (psm2_epid_t *) malloc(nprocs * sizeof(psm2_epid_t)); if (epids_in == NULL) { goto bail; } mask_in = (int *) malloc(nprocs * sizeof(int)); if (mask_in == NULL) { goto bail; } epaddrs_out = (psm2_epaddr_t *) malloc(nprocs * sizeof(psm2_epaddr_t)); if (epaddrs_out == NULL) { goto bail; } rc = OMPI_SUCCESS; /* Get the epids for all the processes from modex */ for (i = 0; i < (int) nprocs; i++) { if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) { /* Already connected: don't connect again */ mask_in[i] = 0; continue; } OPAL_MODEX_RECV(rc, &mca_mtl_psm2_component.super.mtl_version, &procs[i]->super.proc_name, (void**)&epid, &size); if (rc != OMPI_SUCCESS || size != sizeof(psm2_epid_t)) { return OMPI_ERROR; } epids_in[i] = *epid; mask_in[i] = 1; } timeout_in_secs = max(ompi_mtl_psm2.connect_timeout, 0.5 * nprocs); psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_NOP); err = psm2_ep_connect(ompi_mtl_psm2.ep, nprocs, epids_in, mask_in, errs_out, epaddrs_out, timeout_in_secs * 1e9); if (err) { char *errstr = (char *) ompi_mtl_psm2_connect_error_msg(err); if (errstr == NULL) { opal_output(0, "PSM2 returned unhandled/unknown connect error: %s\n", psm2_error_get_string(err)); } for (i = 0; i < (int) nprocs; i++) { if (0 == mask_in[i]) { continue; } psm2_error_t thiserr = errs_out[i]; errstr = (char *) ompi_mtl_psm2_connect_error_msg(thiserr); if (proc_errors[thiserr] == 0) { proc_errors[thiserr] = 1; opal_output(0, "PSM2 EP connect error (%s):", errstr ? errstr : "unknown connect error"); for (j = 0; j < (int) nprocs; j++) { if (errs_out[j] == thiserr) { opal_output(0, " %s", (NULL == procs[j]->super.proc_hostname) ? "unknown" : procs[j]->super.proc_hostname); } } opal_output(0, "\n"); } } rc = OMPI_ERROR; } else { /* Default error handling is enabled, errors will not be returned to * user. PSM2 prints the error and the offending endpoint's hostname * and exits with -1 */ psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT); /* Fill in endpoint data */ for (i = 0; i < (int) nprocs; i++) { if (0 == mask_in[i]) { continue; } mca_mtl_psm2_endpoint_t *endpoint = (mca_mtl_psm2_endpoint_t *) OBJ_NEW(mca_mtl_psm2_endpoint_t); endpoint->peer_epid = epids_in[i]; endpoint->peer_addr = epaddrs_out[i]; procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint; } rc = OMPI_SUCCESS; } bail: if (epids_in != NULL) { free(epids_in); } if (mask_in != NULL) { free(mask_in); } if (errs_out != NULL) { free(errs_out); } if (epaddrs_out != NULL) { free(epaddrs_out); } return rc; }
int ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, struct ompi_proc_t** procs) { int ret, me; size_t i; bool new_found = false; /* Get the list of ptl_process_id_t from the runtime and copy into structure */ for (i = 0 ; i < nprocs ; ++i) { ptl_process_t *modex_id; size_t size; if( procs[i] == ompi_proc_local_proc ) { me = i; } if (procs[i]->super.proc_arch != ompi_proc_local()->super.proc_arch) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "Portals 4 MTL does not support heterogeneous operations."); opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "Proc %s architecture %x, mine %x.", OMPI_NAME_PRINT(&procs[i]->super.proc_name), procs[i]->super.proc_arch, ompi_proc_local()->super.proc_arch); return OMPI_ERR_NOT_SUPPORTED; } OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version, &procs[i]->super, (char**)&modex_id, &size); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: ompi_modex_recv failed: %d\n", __FILE__, __LINE__, ret); return ret; } else if (sizeof(ptl_process_t) != size) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: ompi_modex_recv failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERR_BAD_PARAM; } if (NULL == procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) { ptl_process_t *peer_id; peer_id = malloc(sizeof(ptl_process_t)); if (NULL == peer_id) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: malloc failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERR_OUT_OF_RESOURCE; } *peer_id = *modex_id; procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = peer_id; new_found = true; } else { ptl_process_t *proc = (ptl_process_t*) procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]; if (proc->phys.nid != modex_id->phys.nid || proc->phys.pid != modex_id->phys.pid) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: existing peer and modex peer don't match\n", __FILE__, __LINE__); return OMPI_ERROR; } } } #if OMPI_MTL_PORTALS4_FLOW_CONTROL if (new_found) { ret = ompi_mtl_portals4_flowctl_add_procs(me, nprocs, procs); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: flowctl_add_procs failed: %d\n", __FILE__, __LINE__, ret); return ret; } } #endif return OMPI_SUCCESS; }
mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc) { mca_btl_openib_proc_t *ib_proc = NULL, *ib_proc_ret = NULL; size_t msg_size; uint32_t size; int rc, i, j; void *message; char *offset; int modex_message_size; mca_btl_openib_modex_message_t dummy; bool is_new = false; /* Check if we have already created a IB proc * structure for this ompi process */ ib_proc = ibproc_lookup_and_lock(proc); if (NULL != ib_proc) { /* Gotcha! */ return ib_proc; } /* All initialization has to be an atomic operation. we do the following assumption: * - we let all concurent threads to try to do the initialization; * - when one has finished it locks ib_lock and checks if corresponding * process is still missing; * - if so - new proc is added, otherwise - initialized proc struct is released. */ /* First time, gotta create a new IB proc * out of the opal_proc ... */ ib_proc = OBJ_NEW(mca_btl_openib_proc_t); if (NULL == ib_proc) { return NULL; } /* Initialize number of peer */ ib_proc->proc_endpoint_count = 0; ib_proc->proc_opal = proc; /* query for the peer address info */ OPAL_MODEX_RECV(rc, &mca_btl_openib_component.super.btl_version, &proc->proc_name, &message, &msg_size); if (OPAL_SUCCESS != rc) { BTL_VERBOSE(("[%s:%d] opal_modex_recv failed for peer %s", __FILE__, __LINE__, OPAL_NAME_PRINT(proc->proc_name))); goto no_err_exit; } if (0 == msg_size) { goto no_err_exit; } /* Message was packed in btl_openib_component.c; the format is listed in a comment in that file */ modex_message_size = ((char *) &(dummy.end)) - ((char*) &dummy); /* Unpack the number of modules in the message */ offset = (char *) message; unpack8(&offset, &(ib_proc->proc_port_count)); BTL_VERBOSE(("unpack: %d btls", ib_proc->proc_port_count)); if (ib_proc->proc_port_count > 0) { ib_proc->proc_ports = (mca_btl_openib_proc_modex_t *) malloc(sizeof(mca_btl_openib_proc_modex_t) * ib_proc->proc_port_count); } else { ib_proc->proc_ports = NULL; } /* Loop over unpacking all the ports */ for (i = 0; i < ib_proc->proc_port_count; i++) { /* Unpack the modex comment message struct */ size = modex_message_size; memcpy(&(ib_proc->proc_ports[i].pm_port_info), offset, size); #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT MCA_BTL_OPENIB_MODEX_MSG_NTOH(ib_proc->proc_ports[i].pm_port_info); #endif offset += size; BTL_VERBOSE(("unpacked btl %d: modex message, offset now %d", i, (int)(offset-((char*)message)))); /* Unpack the number of CPCs that follow */ unpack8(&offset, &(ib_proc->proc_ports[i].pm_cpc_data_count)); BTL_VERBOSE(("unpacked btl %d: number of cpcs to follow %d (offset now %d)", i, ib_proc->proc_ports[i].pm_cpc_data_count, (int)(offset-((char*)message)))); ib_proc->proc_ports[i].pm_cpc_data = (opal_btl_openib_connect_base_module_data_t *) calloc(ib_proc->proc_ports[i].pm_cpc_data_count, sizeof(opal_btl_openib_connect_base_module_data_t)); if (NULL == ib_proc->proc_ports[i].pm_cpc_data) { goto err_exit; } /* Unpack the CPCs */ for (j = 0; j < ib_proc->proc_ports[i].pm_cpc_data_count; ++j) { uint8_t u8; opal_btl_openib_connect_base_module_data_t *cpcd; cpcd = ib_proc->proc_ports[i].pm_cpc_data + j; unpack8(&offset, &u8); BTL_VERBOSE(("unpacked btl %d: cpc %d: index %d (offset now %d)", i, j, u8, (int)(offset-(char*)message))); cpcd->cbm_component = opal_btl_openib_connect_base_get_cpc_byindex(u8); BTL_VERBOSE(("unpacked btl %d: cpc %d: component %s", i, j, cpcd->cbm_component->cbc_name)); unpack8(&offset, &cpcd->cbm_priority); unpack8(&offset, &cpcd->cbm_modex_message_len); BTL_VERBOSE(("unpacked btl %d: cpc %d: priority %d, msg len %d (offset now %d)", i, j, cpcd->cbm_priority, cpcd->cbm_modex_message_len, (int)(offset-(char*)message))); if (cpcd->cbm_modex_message_len > 0) { cpcd->cbm_modex_message = malloc(cpcd->cbm_modex_message_len); if (NULL == cpcd->cbm_modex_message) { BTL_ERROR(("Failed to malloc")); goto err_exit; } memcpy(cpcd->cbm_modex_message, offset, cpcd->cbm_modex_message_len); offset += cpcd->cbm_modex_message_len; BTL_VERBOSE(("unpacked btl %d: cpc %d: blob unpacked %d %x (offset now %d)", i, j, ((uint32_t*)cpcd->cbm_modex_message)[0], ((uint32_t*)cpcd->cbm_modex_message)[1], (int)(offset-((char*)message)))); } } } if (0 == ib_proc->proc_port_count) { ib_proc->proc_endpoints = NULL; } else { ib_proc->proc_endpoints = (volatile mca_btl_base_endpoint_t**) malloc(ib_proc->proc_port_count * sizeof(mca_btl_base_endpoint_t*)); } if (NULL == ib_proc->proc_endpoints) { goto err_exit; } BTL_VERBOSE(("unpacking done!")); /* Finally add this process to the initialized procs list */ opal_mutex_lock(&mca_btl_openib_component.ib_lock); ib_proc_ret = ibproc_lookup_no_lock(proc); if (NULL == ib_proc_ret) { /* if process can't be found in this list - insert it locked * it is safe to lock ib_proc here because this thread is * the only one who knows about it so far */ opal_mutex_lock(&ib_proc->proc_lock); opal_list_append(&mca_btl_openib_component.ib_procs, &ib_proc->super); ib_proc_ret = ib_proc; is_new = true; } else { /* otherwise - release module_proc */ OBJ_RELEASE(ib_proc); } opal_mutex_unlock(&mca_btl_openib_component.ib_lock); /* if we haven't insert the process - lock it here so we * won't lock mca_btl_openib_component.ib_lock */ if( !is_new ){ opal_mutex_lock(&ib_proc_ret->proc_lock); } return ib_proc_ret; err_exit: BTL_ERROR(("%d: error exit from mca_btl_openib_proc_create", OPAL_PROC_MY_NAME.vpid)); no_err_exit: OBJ_RELEASE(ib_proc); return NULL; }
/* * Create an opal_btl_usnic_proc_t and initialize it with modex info * and an empty array of endpoints. * * Returns OPAL_ERR_UNREACH if we can't reach the peer (i.e., we can't * find their modex data). */ static int create_proc(opal_proc_t *opal_proc, opal_btl_usnic_proc_t **usnic_proc) { opal_btl_usnic_proc_t *proc = NULL; size_t size; int rc; *usnic_proc = NULL; /* Create the proc if it doesn't already exist */ proc = OBJ_NEW(opal_btl_usnic_proc_t); if (NULL == proc) { return OPAL_ERR_OUT_OF_RESOURCE; } /* Initialize number of peers */ proc->proc_endpoint_count = 0; proc->proc_opal = opal_proc; /* query for the peer address info */ OPAL_MODEX_RECV(rc, &mca_btl_usnic_component.super.btl_version, opal_proc, (uint8_t**)&proc->proc_modex, &size); /* If this proc simply doesn't have this key, then they're not running the usnic BTL -- just ignore them. Otherwise, show an error message. */ if (OPAL_ERR_DATA_VALUE_NOT_FOUND == rc) { OBJ_RELEASE(proc); return OPAL_ERR_UNREACH; } else if (OPAL_SUCCESS != rc) { opal_show_help("help-mpi-btl-usnic.txt", "internal error during init", true, opal_process_info.nodename, "<none>", "<none>", "opal_modex_recv() failed", __FILE__, __LINE__, opal_strerror(rc)); OBJ_RELEASE(proc); return OPAL_ERROR; } if ((size % sizeof(opal_btl_usnic_addr_t)) != 0) { char msg[1024]; snprintf(msg, sizeof(msg), "sizeof(modex for peer %s data) == %d, expected multiple of %d", OPAL_NAME_PRINT(opal_proc->proc_name), (int) size, (int) sizeof(opal_btl_usnic_addr_t)); opal_show_help("help-mpi-btl-usnic.txt", "internal error during init", true, opal_process_info.nodename, "<none>", 0, "invalid modex data", __FILE__, __LINE__, msg); OBJ_RELEASE(proc); return OPAL_ERR_VALUE_OUT_OF_BOUNDS; } proc->proc_modex_count = size / sizeof(opal_btl_usnic_addr_t); if (0 == proc->proc_modex_count) { proc->proc_endpoints = NULL; OBJ_RELEASE(proc); return OPAL_ERR_UNREACH; } /* Sanity check: ensure that the remote proc agrees with this proc on whether we're doing UDP or not. Note that all endpoints on the remote proc will have the same "use_udp" value, so we only need to check one of them. */ if (proc->proc_modex[0].use_udp != mca_btl_usnic_component.use_udp) { opal_show_help("help-mpi-btl-usnic.txt", "transport mismatch", true, opal_process_info.nodename, proc->proc_opal->proc_hostname); OBJ_RELEASE(proc); return OPAL_ERR_BAD_PARAM; } proc->proc_modex_claimed = (bool*) calloc(proc->proc_modex_count, sizeof(bool)); if (NULL == proc->proc_modex_claimed) { OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); OBJ_RELEASE(proc); return OPAL_ERR_OUT_OF_RESOURCE; } proc->proc_endpoints = (mca_btl_base_endpoint_t**) calloc(proc->proc_modex_count, sizeof(mca_btl_base_endpoint_t*)); if (NULL == proc->proc_endpoints) { OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); OBJ_RELEASE(proc); return OPAL_ERR_OUT_OF_RESOURCE; } *usnic_proc = proc; return OPAL_SUCCESS; }
mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc) { mca_btl_openib_proc_t* module_proc = NULL; size_t msg_size; uint32_t size; int rc, i, j; void *message; char *offset; int modex_message_size; mca_btl_openib_modex_message_t dummy; /* Check if we have already created a IB proc * structure for this ompi process */ module_proc = mca_btl_openib_proc_lookup_proc(proc); if (NULL != module_proc) { /* Gotcha! */ return module_proc; } /* Oops! First time, gotta create a new IB proc * out of the opal_proc ... */ module_proc = OBJ_NEW(mca_btl_openib_proc_t); /* Initialize number of peer */ module_proc->proc_endpoint_count = 0; module_proc->proc_opal = proc; /* query for the peer address info */ OPAL_MODEX_RECV(rc, &mca_btl_openib_component.super.btl_version, proc, &message, &msg_size); if (OPAL_SUCCESS != rc) { BTL_ERROR(("[%s:%d] opal_modex_recv failed for peer %s", __FILE__, __LINE__, OPAL_NAME_PRINT(proc->proc_name))); OBJ_RELEASE(module_proc); return NULL; } if (0 == msg_size) { return NULL; } /* Message was packed in btl_openib_component.c; the format is listed in a comment in that file */ modex_message_size = ((char *) &(dummy.end)) - ((char*) &dummy); /* Unpack the number of modules in the message */ offset = (char *) message; unpack8(&offset, &(module_proc->proc_port_count)); BTL_VERBOSE(("unpack: %d btls", module_proc->proc_port_count)); if (module_proc->proc_port_count > 0) { module_proc->proc_ports = (mca_btl_openib_proc_modex_t *) malloc(sizeof(mca_btl_openib_proc_modex_t) * module_proc->proc_port_count); } else { module_proc->proc_ports = NULL; } /* Loop over unpacking all the ports */ for (i = 0; i < module_proc->proc_port_count; i++) { /* Unpack the modex comment message struct */ size = modex_message_size; memcpy(&(module_proc->proc_ports[i].pm_port_info), offset, size); #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT MCA_BTL_OPENIB_MODEX_MSG_NTOH(module_proc->proc_ports[i].pm_port_info); #endif offset += size; BTL_VERBOSE(("unpacked btl %d: modex message, offset now %d", i, (int)(offset-((char*)message)))); /* Unpack the number of CPCs that follow */ unpack8(&offset, &(module_proc->proc_ports[i].pm_cpc_data_count)); BTL_VERBOSE(("unpacked btl %d: number of cpcs to follow %d (offset now %d)", i, module_proc->proc_ports[i].pm_cpc_data_count, (int)(offset-((char*)message)))); module_proc->proc_ports[i].pm_cpc_data = (opal_btl_openib_connect_base_module_data_t *) calloc(module_proc->proc_ports[i].pm_cpc_data_count, sizeof(opal_btl_openib_connect_base_module_data_t)); if (NULL == module_proc->proc_ports[i].pm_cpc_data) { return NULL; } /* Unpack the CPCs */ for (j = 0; j < module_proc->proc_ports[i].pm_cpc_data_count; ++j) { uint8_t u8; opal_btl_openib_connect_base_module_data_t *cpcd; cpcd = module_proc->proc_ports[i].pm_cpc_data + j; unpack8(&offset, &u8); BTL_VERBOSE(("unpacked btl %d: cpc %d: index %d (offset now %d)", i, j, u8, (int)(offset-(char*)message))); cpcd->cbm_component = opal_btl_openib_connect_base_get_cpc_byindex(u8); BTL_VERBOSE(("unpacked btl %d: cpc %d: component %s", i, j, cpcd->cbm_component->cbc_name)); unpack8(&offset, &cpcd->cbm_priority); unpack8(&offset, &cpcd->cbm_modex_message_len); BTL_VERBOSE(("unpacked btl %d: cpc %d: priority %d, msg len %d (offset now %d)", i, j, cpcd->cbm_priority, cpcd->cbm_modex_message_len, (int)(offset-(char*)message))); if (cpcd->cbm_modex_message_len > 0) { cpcd->cbm_modex_message = malloc(cpcd->cbm_modex_message_len); if (NULL == cpcd->cbm_modex_message) { BTL_ERROR(("Failed to malloc")); return NULL; } memcpy(cpcd->cbm_modex_message, offset, cpcd->cbm_modex_message_len); offset += cpcd->cbm_modex_message_len; BTL_VERBOSE(("unpacked btl %d: cpc %d: blob unpacked %d %x (offset now %d)", i, j, ((uint32_t*)cpcd->cbm_modex_message)[0], ((uint32_t*)cpcd->cbm_modex_message)[1], (int)(offset-((char*)message)))); } } } if (0 == module_proc->proc_port_count) { module_proc->proc_endpoints = NULL; } else { module_proc->proc_endpoints = (mca_btl_base_endpoint_t**) malloc(module_proc->proc_port_count * sizeof(mca_btl_base_endpoint_t*)); } if (NULL == module_proc->proc_endpoints) { OBJ_RELEASE(module_proc); return NULL; } BTL_VERBOSE(("unpacking done!")); return module_proc; }
mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc) { mca_btl_tcp_proc_t* btl_proc; size_t size; int rc; OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock); rc = opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs, proc->proc_name, (void**)&btl_proc); if(OPAL_SUCCESS == rc) { OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock); return btl_proc; } do { btl_proc = OBJ_NEW(mca_btl_tcp_proc_t); if(NULL == btl_proc) { rc = OPAL_ERR_OUT_OF_RESOURCE; break; } btl_proc->proc_opal = proc; OBJ_RETAIN(btl_proc->proc_opal); /* lookup tcp parameters exported by this proc */ OPAL_MODEX_RECV(rc, &mca_btl_tcp_component.super.btl_version, &proc->proc_name, (uint8_t**)&btl_proc->proc_addrs, &size); if(rc != OPAL_SUCCESS) { if(OPAL_ERR_NOT_FOUND != rc) BTL_ERROR(("opal_modex_recv: failed with return value=%d", rc)); break; } if(0 != (size % sizeof(mca_btl_tcp_addr_t))) { BTL_ERROR(("opal_modex_recv: invalid size %lu: btl-size: %lu\n", (unsigned long) size, (unsigned long)sizeof(mca_btl_tcp_addr_t))); rc = OPAL_ERROR; break; } btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp_addr_t); /* allocate space for endpoint array - one for each exported address */ btl_proc->proc_endpoints = (mca_btl_base_endpoint_t**) malloc((1 + btl_proc->proc_addr_count) * sizeof(mca_btl_base_endpoint_t*)); if(NULL == btl_proc->proc_endpoints) { rc = OPAL_ERR_OUT_OF_RESOURCE; break; } if(NULL == mca_btl_tcp_component.tcp_local && (proc == opal_proc_local_get())) { mca_btl_tcp_component.tcp_local = btl_proc; } /* convert the OPAL addr_family field to OS constants, * so we can check for AF_INET (or AF_INET6) and don't have * to deal with byte ordering anymore. */ for (unsigned int i = 0; i < btl_proc->proc_addr_count; i++) { if (MCA_BTL_TCP_AF_INET == btl_proc->proc_addrs[i].addr_family) { btl_proc->proc_addrs[i].addr_family = AF_INET; } #if OPAL_ENABLE_IPV6 if (MCA_BTL_TCP_AF_INET6 == btl_proc->proc_addrs[i].addr_family) { btl_proc->proc_addrs[i].addr_family = AF_INET6; } #endif } } while (0); if (OPAL_SUCCESS == rc) { /* add to hash table of all proc instance. */ opal_proc_table_set_value(&mca_btl_tcp_component.tcp_procs, proc->proc_name, btl_proc); } else { if (btl_proc) { OBJ_RELEASE(btl_proc); btl_proc = NULL; } } OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock); return btl_proc; }
int ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, struct ompi_proc_t** procs) { int ret, me; size_t i; bool new_found = false; ptl_process_t *maptable; if (ompi_mtl_portals4.use_logical) { maptable = malloc(sizeof(ptl_process_t) * nprocs); if (NULL == maptable) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: malloc failed\n", __FILE__, __LINE__); return OMPI_ERR_OUT_OF_RESOURCE; } } /* Get the list of ptl_process_id_t from the runtime and copy into structure */ for (i = 0 ; i < nprocs ; ++i) { ptl_process_t *modex_id; size_t size; if( procs[i] == ompi_proc_local_proc ) { me = i; } if (procs[i]->super.proc_arch != ompi_proc_local()->super.proc_arch) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "Portals 4 MTL does not support heterogeneous operations."); opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "Proc %s architecture %x, mine %x.", OMPI_NAME_PRINT(&procs[i]->super.proc_name), procs[i]->super.proc_arch, ompi_proc_local()->super.proc_arch); return OMPI_ERR_NOT_SUPPORTED; } OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version, &procs[i]->super.proc_name, (uint8_t**)&modex_id, &size); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: ompi_modex_recv failed: %d\n", __FILE__, __LINE__, ret); return ret; } else if (sizeof(ptl_process_t) != size) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: ompi_modex_recv failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERR_BAD_PARAM; } if (NULL == procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) { ptl_process_t *peer_id; peer_id = malloc(sizeof(ptl_process_t)); if (NULL == peer_id) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: malloc failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERR_OUT_OF_RESOURCE; } if (ompi_mtl_portals4.use_logical) { peer_id->rank = i; maptable[i].phys.pid = modex_id->phys.pid; maptable[i].phys.nid = modex_id->phys.nid; opal_output_verbose(50, ompi_mtl_base_framework.framework_output, "logical: global rank=%d pid=%d nid=%d\n", (int)i, maptable[i].phys.pid, maptable[i].phys.nid); } else { *peer_id = *modex_id; } procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = peer_id; new_found = true; } else { ptl_process_t *proc = (ptl_process_t*) procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]; if (ompi_mtl_portals4.use_logical) { if ((size_t)proc->rank != i) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: existing peer and rank don't match\n", __FILE__, __LINE__); return OMPI_ERROR; } maptable[i].phys.pid = modex_id->phys.pid; maptable[i].phys.nid = modex_id->phys.nid; } else if (proc->phys.nid != modex_id->phys.nid || proc->phys.pid != modex_id->phys.pid) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: existing peer and modex peer don't match\n", __FILE__, __LINE__); return OMPI_ERROR; } } } if (ompi_mtl_portals4.use_logical) { ret = PtlSetMap(ompi_mtl_portals4.ni_h, nprocs, maptable); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: logical mapping failed: %d\n", __FILE__, __LINE__, ret); return ret; } opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "logical mapping OK\n"); free(maptable); } portals4_init_interface(); /* activate progress callback */ ret = opal_progress_register(ompi_mtl_portals4_progress); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: opal_progress_register failed: %d\n", __FILE__, __LINE__, ret); return ret; } #if OMPI_MTL_PORTALS4_FLOW_CONTROL if (new_found) { ret = ompi_mtl_portals4_flowctl_add_procs(me, nprocs, procs); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: flowctl_add_procs failed: %d\n", __FILE__, __LINE__, ret); return ret; } } #endif return OMPI_SUCCESS; }