/* * Look for an existing TCP process instance based on the globally unique * process identifier. */ mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const opal_process_name_t *name) { mca_btl_tcp_proc_t* proc = NULL; OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock); opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs, *name, (void**)&proc); OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock); if (OPAL_UNLIKELY(NULL == proc)) { mca_btl_base_endpoint_t *endpoint; opal_proc_t *opal_proc; BTL_VERBOSE(("adding tcp proc for unknown peer {.jobid = 0x%x, .vpid = 0x%x}", name->jobid, name->vpid)); opal_proc = opal_proc_for_name (*name); if (NULL == opal_proc) { return NULL; } /* try adding this proc to each btl until */ for( uint32_t i = 0; i < mca_btl_tcp_component.tcp_num_btls; ++i ) { endpoint = NULL; (void) mca_btl_tcp_add_procs (&mca_btl_tcp_component.tcp_btls[i]->super, 1, &opal_proc, &endpoint, NULL); if (NULL != endpoint && NULL == proc) { /* get the proc and continue on (could probably just break here) */ proc = endpoint->endpoint_proc; } } } return proc; }
static inline int mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module) { uint64_t datagram_id, data, proc_id; uint32_t remote_addr, remote_id; mca_btl_base_endpoint_t *ep; gni_post_state_t post_state; gni_ep_handle_t handle; gni_return_t grc; int count = 0, rc; /* check for datagram completion */ OPAL_THREAD_LOCK(&ugni_module->device->dev_lock); /* TODO: may not need lock for this function */ grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id); if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) { OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock); return 0; } data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK); BTL_VERBOSE(("datgram_id: %" PRIx64 ", mask: %" PRIx64, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK))); if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) { ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&ugni_module->endpoints, data); handle = ep->smsg_ep_handle; } else { handle = ugni_module->wildcard_ep; } /* wait for the incoming datagram to complete (in case it isn't) */ grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state, &remote_addr, &remote_id); OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock); if (GNI_RC_SUCCESS != grc) { BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc)); return opal_common_rc_ugni_to_opal (grc); } /* if this is a wildcard endpoint lookup the remote peer by the proc id we received */ if (handle == ugni_module->wildcard_ep) { proc_id = mca_btl_ugni_proc_name_to_id (ugni_module->wc_remote_attr.proc_name); BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64, proc_id)); OPAL_THREAD_LOCK(&ugni_module->endpoint_lock); rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep); OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock); /* check if the endpoint is known */ if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) { struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name); BTL_VERBOSE(("Got connection request from an unknown peer {jobid = 0x%x, vid = 0x%x}", ugni_module->wc_remote_attr.proc_name.jobid, ugni_module->wc_remote_attr.proc_name.vpid)); ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc); if (OPAL_UNLIKELY(NULL == ep)) { return rc; } } } else { BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep)); } /* should not have gotten a NULL endpoint */ assert (NULL != ep); BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, " "data = 0x%" PRIx64 ", ep = %p, remote id: %d", datagram_id, post_state, data, (void *) ep, remote_id)); /* NTH: TODO -- error handling */ opal_mutex_lock (&ep->lock); if (handle != ugni_module->wildcard_ep) { /* directed post complete */ ep->dg_posted = false; } (void) mca_btl_ugni_ep_connect_progress (ep); opal_mutex_unlock (&ep->lock); if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) { /* process messages waiting in the endpoint's smsg mailbox */ count = mca_btl_ugni_smsg_process (ep); } /* repost the wildcard datagram */ if (handle == ugni_module->wildcard_ep) { mca_btl_ugni_wildcard_ep_post (ugni_module); } return count; }
int mca_btl_ugni_progress_datagram (mca_btl_ugni_device_t *device) { mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules; mca_btl_base_endpoint_t *ep; gni_ep_handle_t handle; int count = 0, rc; rc = mca_btl_ugni_get_datagram (ugni_module, device, &handle, &ep); if (1 != rc) { return rc; } BTL_VERBOSE(("remote datagram completion on handle %p", handle)); /* if this is a wildcard endpoint lookup the remote peer by the proc id we received */ if (handle == ugni_module->wildcard_ep) { struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name); BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc: %s", OPAL_NAME_PRINT(ugni_module->wc_remote_attr.proc_name))); ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc); if (OPAL_UNLIKELY(NULL == ep)) { /* there is no way to recover from this error so just abort() */ BTL_ERROR(("could not find/allocate a btl endpoint for peer %s", OPAL_NAME_PRINT(ugni_module->wc_remote_attr.proc_name))); abort (); return OPAL_ERR_NOT_FOUND; } } /* should not have gotten a NULL endpoint */ assert (NULL != ep); BTL_VERBOSE(("got a datagram completion: ep = %p. wc = %d", (void *) ep, handle == ugni_module->wildcard_ep)); /* NTH: TODO -- error handling */ opal_mutex_lock (&ep->lock); if (handle != ugni_module->wildcard_ep) { /* directed post complete */ BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep)); ep->dg_posted = false; (void) opal_atomic_add_32 (&ugni_module->active_datagrams, -1); } (void) mca_btl_ugni_ep_connect_progress (ep); opal_mutex_unlock (&ep->lock); if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) { /* process messages waiting in the endpoint's smsg mailbox */ count = mca_btl_ugni_smsg_process (ep); } /* repost the wildcard datagram */ if (handle == ugni_module->wildcard_ep) { mca_btl_ugni_wildcard_ep_post (ugni_module); } return count; }