static ucs_status_t recieve_datagram(uct_ugni_udt_iface_t *iface, uint64_t id, uct_ugni_udt_ep_t **ep_out) { uint32_t rem_addr, rem_id; gni_post_state_t post_state; gni_return_t ugni_rc; uct_ugni_udt_ep_t *ep; gni_ep_handle_t gni_ep; uct_ugni_udt_desc_t *desc; uct_ugni_udt_header_t *header; ucs_trace_func("iface=%p, id=%lx", iface, id); if (UCT_UGNI_UDT_ANY == id) { ep = NULL; gni_ep = iface->ep_any; desc = iface->desc_any; } else { ep = ucs_derived_of(uct_ugni_iface_lookup_ep(&iface->super, id), uct_ugni_udt_ep_t); gni_ep = ep->super.ep; desc = ep->posted_desc; } *ep_out = ep; uct_ugni_device_lock(&iface->super.cdm); ugni_rc = GNI_EpPostDataWaitById(gni_ep, id, -1, &post_state, &rem_addr, &rem_id); uct_ugni_device_unlock(&iface->super.cdm); if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { ucs_error("GNI_EpPostDataWaitById, id=%lu Error status: %s %d", id, gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_IO_ERROR; } if (GNI_POST_TERMINATED == post_state) { return UCS_ERR_CANCELED; } if (GNI_POST_COMPLETED != post_state) { ucs_error("GNI_EpPostDataWaitById gave unexpected response: %u", post_state); return UCS_ERR_IO_ERROR; } if (UCT_UGNI_UDT_ANY != id) { --iface->super.outstanding; } header = uct_ugni_udt_get_rheader(desc, iface); ucs_trace("Got datagram id: %lu type: %i len: %i am_id: %i", id, header->type, header->length, header->am_id); if (UCT_UGNI_UDT_PAYLOAD != header->type) { /* ack message, no data */ ucs_assert_always(NULL != ep); ucs_mpool_put(ep->posted_desc); uct_ugni_check_flush(ep->desc_flush_group); ep->posted_desc = NULL; return UCS_OK; } return UCS_INPROGRESS; }
static void uct_ugni_udt_clean_wildcard(uct_ugni_udt_iface_t *iface) { gni_return_t ugni_rc; uint32_t rem_addr, rem_id; gni_post_state_t post_state; uct_ugni_device_lock(&iface->super.cdm); ugni_rc = GNI_EpPostDataCancelById(iface->ep_any, UCT_UGNI_UDT_ANY); if (GNI_RC_SUCCESS != ugni_rc) { uct_ugni_device_unlock(&iface->super.cdm); ucs_error("GNI_EpPostDataCancel failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return; } ugni_rc = GNI_EpPostDataTestById(iface->ep_any, UCT_UGNI_UDT_ANY, &post_state, &rem_addr, &rem_id); if (GNI_RC_SUCCESS != ugni_rc) { if (GNI_RC_NO_MATCH != ugni_rc) { uct_ugni_device_unlock(&iface->super.cdm); ucs_error("GNI_EpPostDataTestById failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return; } } else { if (GNI_POST_PENDING == post_state) { ugni_rc = GNI_EpPostDataWaitById(iface->ep_any, UCT_UGNI_UDT_ANY, -1, &post_state, &rem_addr, &rem_id); } } ugni_rc = GNI_EpDestroy(iface->ep_any); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_EpDestroy failed, Error status: %s %d\n", gni_err_str[ugni_rc], ugni_rc); } uct_ugni_device_unlock(&iface->super.cdm); }
static inline int mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module) { uint32_t remote_addr, remote_id; mca_btl_base_endpoint_t *ep; gni_post_state_t post_state; gni_ep_handle_t handle; uint64_t datagram_id; gni_return_t grc; int count = 0; /* check for datagram completion */ grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id); if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) { return 0; } if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_WILDCARD_ID) { handle = ugni_module->wildcard_ep; } else { handle = ugni_module->endpoints[(uint32_t)(datagram_id & 0xffffffffull)]->smsg_ep_handle; } /* wait for the incoming datagram to complete (in case it isn't) */ grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state, &remote_addr, &remote_id); if (GNI_RC_SUCCESS != grc) { BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc)); return ompi_common_rc_ugni_to_ompi (grc); } BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, " "peer = %d", datagram_id, post_state, remote_id)); ep = ugni_module->endpoints[remote_id]; /* NTH: TODO -- error handling */ (void) mca_btl_ugni_ep_connect_progress (ep); if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) { /* process messages waiting in the endpoint's smsg mailbox */ count = mca_btl_ugni_smsg_process (ep); } /* repost the wildcard datagram */ if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_WILDCARD_ID) { mca_btl_ugni_wildcard_ep_post (ugni_module); } return count; }
static inline int mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module) { uint64_t datagram_id, data, proc_id; uint32_t remote_addr, remote_id; mca_btl_base_endpoint_t *ep; gni_post_state_t post_state; gni_ep_handle_t handle; gni_return_t grc; int count = 0, rc; /* check for datagram completion */ OPAL_THREAD_LOCK(&ugni_module->device->dev_lock); /* TODO: may not need lock for this function */ grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id); if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) { OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock); return 0; } data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK); BTL_VERBOSE(("datgram_id: %" PRIx64 ", mask: %" PRIx64, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK))); if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) { ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&ugni_module->endpoints, data); handle = ep->smsg_ep_handle; } else { handle = ugni_module->wildcard_ep; } /* wait for the incoming datagram to complete (in case it isn't) */ grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state, &remote_addr, &remote_id); OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock); if (GNI_RC_SUCCESS != grc) { BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc)); return opal_common_rc_ugni_to_opal (grc); } /* if this is a wildcard endpoint lookup the remote peer by the proc id we received */ if (handle == ugni_module->wildcard_ep) { proc_id = mca_btl_ugni_proc_name_to_id (ugni_module->wc_remote_attr.proc_name); BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64, proc_id)); OPAL_THREAD_LOCK(&ugni_module->endpoint_lock); rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep); OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock); /* check if the endpoint is known */ if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) { struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name); BTL_VERBOSE(("Got connection request from an unknown peer {jobid = 0x%x, vid = 0x%x}", ugni_module->wc_remote_attr.proc_name.jobid, ugni_module->wc_remote_attr.proc_name.vpid)); ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc); if (OPAL_UNLIKELY(NULL == ep)) { return rc; } } } else { BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep)); } /* should not have gotten a NULL endpoint */ assert (NULL != ep); BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, " "data = 0x%" PRIx64 ", ep = %p, remote id: %d", datagram_id, post_state, data, (void *) ep, remote_id)); /* NTH: TODO -- error handling */ opal_mutex_lock (&ep->lock); if (handle != ugni_module->wildcard_ep) { /* directed post complete */ ep->dg_posted = false; } (void) mca_btl_ugni_ep_connect_progress (ep); opal_mutex_unlock (&ep->lock); if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) { /* process messages waiting in the endpoint's smsg mailbox */ count = mca_btl_ugni_smsg_process (ep); } /* repost the wildcard datagram */ if (handle == ugni_module->wildcard_ep) { mca_btl_ugni_wildcard_ep_post (ugni_module); } return count; }
static void uct_ugni_udt_progress(void *arg) { uint32_t rem_addr, rem_id; uint64_t id; void *payload; void *user_desc; ucs_status_t status; uct_ugni_udt_desc_t *desc; uct_ugni_udt_header_t *header; uct_ugni_udt_iface_t * iface = (uct_ugni_udt_iface_t *)arg; uct_ugni_udt_ep_t *ep; gni_ep_handle_t ugni_ep; gni_post_state_t post_state; gni_return_t ugni_rc; pthread_mutex_lock(&uct_ugni_global_lock); ugni_rc = GNI_PostDataProbeById(iface->super.nic_handle, &id); if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { if (GNI_RC_NO_MATCH != ugni_rc) { ucs_error("GNI_PostDataProbeById , Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); } goto exit; } if (UCT_UGNI_UDT_ANY == id) { /* New incomming message */ ep = NULL; ugni_ep = iface->ep_any; desc = iface->desc_any; } else { /* Ack message */ ep = ucs_derived_of(uct_ugni_iface_lookup_ep(&iface->super, id), uct_ugni_udt_ep_t); if (ucs_unlikely(NULL == ep)) { ucs_error("Can not lookup ep with id %"PRIx64,id); goto exit; } ugni_ep = ep->super.ep; desc = ep->posted_desc; } ugni_rc = GNI_EpPostDataWaitById(ugni_ep, id, -1, &post_state, &rem_addr, &rem_id); if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { ucs_error("GNI_EpPostDataWaitById, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); goto exit; } header = uct_ugni_udt_get_rheader(desc, iface); payload = uct_ugni_udt_get_rpayload(desc, iface); user_desc = uct_ugni_udt_get_user_desc(desc, iface); if (UCT_UGNI_UDT_ANY == id) { /* New incomming message */ ucs_assert_always(header->type == UCT_UGNI_UDT_PAYLOAD); uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_RECV, header->am_id, payload, header->length, "RX: AM"); status = uct_iface_invoke_am(&iface->super.super, header->am_id, payload, header->length, user_desc); if (UCS_OK != status) { uct_ugni_udt_desc_t *new_desc; /* set iface for a later release call */ uct_recv_desc_iface(user_desc) = &iface->super.super.super; /* Allocate a new element */ UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, new_desc, goto exit); /* set the new desc */ iface->desc_any = new_desc; }
static inline int mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module) { uint32_t remote_addr, remote_id; uint64_t datagram_id, data; mca_btl_base_endpoint_t *ep; gni_post_state_t post_state; gni_ep_handle_t handle; gni_return_t grc; int count = 0, rc; /* check for datagram completion */ grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id); if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) { return 0; } data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK); BTL_VERBOSE(("datgram_id: %" PRIx64 ", mask: %" PRIx64, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK))); if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) { ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&ugni_module->endpoints, data); handle = ep->smsg_ep_handle; } else { handle = ugni_module->wildcard_ep; } /* wait for the incoming datagram to complete (in case it isn't) */ grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state, &remote_addr, &remote_id); if (GNI_RC_SUCCESS != grc) { BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc)); return ompi_common_rc_ugni_to_ompi (grc); } /* if this is a wildcard endpoint lookup the remote peer by the proc id we received */ if (handle == ugni_module->wildcard_ep) { BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64, ugni_module->wc_remote_attr.proc_id)); rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, ugni_module->wc_remote_attr.proc_id, (void *) &ep); /* check if the endpoint is known */ if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) { BTL_ERROR(("received connection attempt from an unknown peer. rc: %d, ep: %p, id: 0x%" PRIx64, rc, ep, ugni_module->wc_remote_attr.proc_id)); return OMPI_ERR_NOT_FOUND; } } else { BTL_VERBOSE(("directed datagram complete for endpoint %p", ep)); } /* should not have gotten a NULL endpoint */ assert (NULL != ep); BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, " "data = 0x%" PRIx64 ", ep = %p, remote id: %d", datagram_id, post_state, data, ep, remote_id)); /* NTH: TODO -- error handling */ (void) mca_btl_ugni_ep_connect_progress (ep); if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) { /* process messages waiting in the endpoint's smsg mailbox */ count = mca_btl_ugni_smsg_process (ep); } /* repost the wildcard datagram */ if (handle == ugni_module->wildcard_ep) { mca_btl_ugni_wildcard_ep_post (ugni_module); } return count; }