static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) { mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep); gni_return_t grc; int rc; BTL_VERBOSE(("finishing connection. remote attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, " "mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, " "msg_maxsize = %d", ep->remote_attr->smsg_attr.msg_type, ep->remote_attr->smsg_attr.msg_buffer, ep->remote_attr->smsg_attr.buff_size, ep->remote_attr->smsg_attr.mem_hndl.qword1, ep->remote_attr->smsg_attr.mem_hndl.qword2, ep->remote_attr->smsg_attr.mbox_offset, ep->remote_attr->smsg_attr.mbox_maxcredit, ep->remote_attr->smsg_attr.msg_maxsize)); BTL_VERBOSE(("finishing connection. local attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, " "mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, " "msg_maxsize = %d", ep->mailbox->attr.smsg_attr.msg_type, ep->mailbox->attr.smsg_attr.msg_buffer, ep->mailbox->attr.smsg_attr.buff_size, ep->mailbox->attr.smsg_attr.mem_hndl.qword1, ep->mailbox->attr.smsg_attr.mem_hndl.qword2, ep->mailbox->attr.smsg_attr.mbox_offset, ep->mailbox->attr.smsg_attr.mbox_maxcredit, ep->mailbox->attr.smsg_attr.msg_maxsize)); grc = GNI_SmsgInit (ep->smsg_ep_handle.gni_handle, &ep->mailbox->attr.smsg_attr, &ep->remote_attr->smsg_attr); if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) { BTL_ERROR(("error initializing SMSG protocol. rc = %d", grc)); return mca_btl_rc_ugni_to_opal (grc); } /* set the local event data to the local index and the remote event data to my * index on the remote peer. This makes lookup of endpoints on completion take * a single lookup in the endpoints array. we will not be able to change the * remote peer's index in the endpoint's array after this point. */ GNI_EpSetEventData (ep->smsg_ep_handle.gni_handle, ep->index, ep->remote_attr->index); ep->rmt_irq_mem_hndl = ep->remote_attr->rmt_irq_mem_hndl; ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED; (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle.device->smsg_connections, 1); /* send all pending messages */ BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list))); rc = mca_btl_ugni_progress_send_wait_list (ep); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock); if (false == ep->wait_listed) { opal_list_append (&ugni_module->ep_wait_list, &ep->super); ep->wait_listed = true; } OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock); } free (ep->remote_attr); ep->remote_attr = NULL; return OPAL_SUCCESS; }
static inline int mca_btl_ugni_get_stat (const mca_base_pvar_t *pvar, void *value, void *obj) { gni_statistic_t statistic = (gni_statistic_t) (intptr_t) pvar->ctx; gni_return_t rc = GNI_RC_SUCCESS; for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) { rc = GNI_GetNicStat (mca_btl_ugni_component.modules[0].devices[i].dev_handle, statistic, ((unsigned int *) value) + i); } return mca_btl_rc_ugni_to_opal (rc); }
int mca_btl_ugni_wildcard_ep_post (mca_btl_ugni_module_t *ugni_module) { gni_return_t rc; BTL_VERBOSE(("posting wildcard datagram")); memset (&ugni_module->wc_local_attr, 0, sizeof (ugni_module->wc_local_attr)); memset (&ugni_module->wc_remote_attr, 0, sizeof (ugni_module->wc_remote_attr)); rc = GNI_EpPostDataWId (ugni_module->wildcard_ep, &ugni_module->wc_local_attr, sizeof (ugni_module->wc_local_attr), &ugni_module->wc_remote_attr, sizeof (ugni_module->wc_remote_attr), MCA_BTL_UGNI_CONNECT_WILDCARD_ID); return mca_btl_rc_ugni_to_opal (rc); }
int mca_btl_ugni_ep_handle_init (mca_btl_ugni_endpoint_t *ep, gni_cq_handle_t cq, mca_btl_ugni_device_t *device, mca_btl_ugni_endpoint_handle_t *ep_handle) { gni_return_t grc; ep_handle->device = device; /* create a uGNI endpoint handle and bind it to the remote peer */ grc = GNI_EpCreate (device->dev_handle, cq, &ep_handle->gni_handle); if (OPAL_LIKELY(GNI_RC_SUCCESS == grc)) { grc = GNI_EpBind (ep_handle->gni_handle, ep->ep_rem_addr, ep->ep_rem_id); } return mca_btl_rc_ugni_to_opal (grc); }
int mca_btl_ugni_smsg_init (mca_btl_ugni_module_t *ugni_module) { gni_return_t rc; for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) { rc = GNI_SmsgSetMaxRetrans (ugni_module->devices[i].dev_handle, mca_btl_ugni_component.smsg_max_retries); if (GNI_RC_SUCCESS != rc) { BTL_ERROR(("error setting maximum SMSG retries %s",gni_err_str[rc])); return mca_btl_rc_ugni_to_opal (rc); } } return OPAL_SUCCESS; }
static int mca_btl_ugni_ep_send_disconnect (mca_btl_base_endpoint_t *ep) { int rc; do { rc = mca_btl_ugni_endpoint_smsg_send_wtag (ep, NULL, 0, NULL, 0, -1, MCA_BTL_UGNI_TAG_DISCONNECT); if (OPAL_LIKELY(GNI_RC_NOT_DONE != rc)) { break; } /* most likely got here because we are out of credits. check the remote CQ to get credit return */ (void) mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_ep_btl (ep)); } while (1); return mca_btl_rc_ugni_to_opal (rc); }
int mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_module_t *btl) { mca_btl_base_endpoint_t *ep; gni_cq_entry_t event_data; gni_return_t grc; uint64_t inst_id; grc = mca_btl_ugni_gni_cq_get_event (btl->devices, btl->smsg_remote_cq, &event_data); if (GNI_RC_NOT_DONE == grc) { return 0; } if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc || !GNI_CQ_STATUS_OK(event_data) || GNI_CQ_OVERRUN(event_data))) { if (GNI_RC_ERROR_RESOURCE == grc || (GNI_RC_SUCCESS == grc && GNI_CQ_OVERRUN(event_data))) { /* recover from smsg cq overrun */ return mca_btl_ugni_handle_remote_smsg_overrun (btl); } BTL_ERROR(("unhandled error in GNI_CqGetEvent")); /* unhandled error: crash */ assert (0); return mca_btl_rc_ugni_to_opal (grc); } BTL_VERBOSE(("REMOTE CQ: Got event 0x%" PRIx64 ". msg id = %" PRIu64 ". ok = %d, type = %" PRIu64, (uint64_t) event_data, GNI_CQ_GET_INST_ID(event_data), GNI_CQ_STATUS_OK(event_data), GNI_CQ_GET_TYPE(event_data))); inst_id = GNI_CQ_GET_INST_ID(event_data); ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&btl->endpoints, inst_id); if (OPAL_UNLIKELY(MCA_BTL_UGNI_EP_STATE_CONNECTED != ep->state)) { /* due to the nature of datagrams we may get a smsg completion before we get mailbox info from the peer */ BTL_VERBOSE(("event occurred on an unconnected endpoint! ep state = %d", ep->state)); return 0; } return mca_btl_ugni_smsg_process (ep); }
static int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep) { mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep); gni_return_t rc; BTL_VERBOSE(("posting directed datagram to remote id: %d for endpoint %p", ep->ep_rem_id, (void *)ep)); /* the irq cq is associated with only the first device */ ep->mailbox->attr.rmt_irq_mem_hndl = ugni_module->devices->smsg_irq_mhndl; rc = GNI_EpPostDataWId (ep->smsg_ep_handle.gni_handle, &ep->mailbox->attr, sizeof (ep->mailbox->attr), ep->remote_attr, sizeof (*ep->remote_attr), MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index); if (OPAL_LIKELY(GNI_RC_SUCCESS == rc)) { (void) opal_atomic_add_fetch_32 (&ugni_module->active_datagrams, 1); } return mca_btl_rc_ugni_to_opal (rc); }