int opal_common_ugni_ep_create (opal_common_ugni_endpoint_t *cep, gni_cq_handle_t cq, gni_ep_handle_t *ep_handle) { gni_return_t grc; if (OPAL_UNLIKELY(NULL == cep)) { assert (0); return OPAL_ERR_BAD_PARAM; } /* create a uGNI endpoint handle and bind it to the remote peer */ OPAL_THREAD_LOCK(&cep->dev->dev_lock); grc = GNI_EpCreate (cep->dev->dev_handle, cq, ep_handle); OPAL_THREAD_UNLOCK(&cep->dev->dev_lock); if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) { return opal_common_rc_ugni_to_opal (grc); } OPAL_THREAD_LOCK(&cep->dev->dev_lock); grc = GNI_EpBind (*ep_handle, cep->ep_rem_addr, cep->ep_rem_id); OPAL_THREAD_UNLOCK(&cep->dev->dev_lock); if (GNI_RC_SUCCESS != grc) { OPAL_THREAD_LOCK(&cep->dev->dev_lock); GNI_EpDestroy (*ep_handle); OPAL_THREAD_UNLOCK(&cep->dev->dev_lock); return opal_common_rc_ugni_to_opal (grc); } return OPAL_SUCCESS; }
/* Endpoint definition */ UCS_CLASS_INIT_FUNC(uct_ugni_ep_t, const uct_ep_params_t *params) { uct_ugni_iface_t *iface = ucs_derived_of(params->iface, uct_ugni_iface_t); ucs_status_t rc = UCS_OK; gni_return_t ugni_rc; uint32_t *big_hash; self->arb_sched = 0; UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super); self->flush_group = uct_ugni_new_flush_group(iface); #ifdef DEBUG self->flush_group->flush_comp.func = NULL; self->flush_group->parent = NULL; #endif uct_ugni_cdm_lock(&iface->cdm); ugni_rc = GNI_EpCreate(uct_ugni_iface_nic_handle(iface), iface->local_cq, &self->ep); uct_ugni_cdm_unlock(&iface->cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_CdmCreate failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_NO_DEVICE; } ucs_arbiter_group_init(&self->arb_group); big_hash = (void *)&self->ep; self->hash_key = big_hash[0]; if (uct_ugni_check_device_type(iface, GNI_DEVICE_ARIES)) { self->hash_key &= 0x00FFFFFF; } ucs_debug("Adding ep hash %x to iface %p", self->hash_key, iface); sglib_hashed_uct_ugni_ep_t_add(iface->eps, self); return rc; }
/* Endpoint definition */ UCS_CLASS_INIT_FUNC(uct_ugni_ep_t, uct_iface_t *tl_iface, const struct sockaddr *addr) { uct_ugni_iface_t *iface = ucs_derived_of(tl_iface, uct_ugni_iface_t); const uct_sockaddr_ugni_t *iface_addr = (const uct_sockaddr_ugni_t*)addr; ucs_status_t rc = UCS_OK; gni_return_t ugni_rc; UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super); ugni_rc = GNI_EpCreate(iface->nic_handle, iface->local_cq, &self->ep); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_CdmCreate failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_NO_DEVICE; } if(NULL != addr){ rc = ugni_connect_ep(iface, iface_addr, self); } ucs_arbiter_group_init(&self->arb_group); uint32_t *big_hash; big_hash = (void *)&self->ep; self->hash_key = big_hash[0]; sglib_hashed_uct_ugni_ep_t_add(iface->eps, self); return rc; }
int mca_btl_ugni_ep_handle_init (mca_btl_ugni_endpoint_t *ep, gni_cq_handle_t cq, mca_btl_ugni_device_t *device, mca_btl_ugni_endpoint_handle_t *ep_handle) { gni_return_t grc; ep_handle->device = device; /* create a uGNI endpoint handle and bind it to the remote peer */ grc = GNI_EpCreate (device->dev_handle, cq, &ep_handle->gni_handle); if (OPAL_LIKELY(GNI_RC_SUCCESS == grc)) { grc = GNI_EpBind (ep_handle->gni_handle, ep->ep_rem_addr, ep->ep_rem_id); } return mca_btl_rc_ugni_to_opal (grc); }
int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module, ompi_common_ugni_device_t *dev) { int rc; BTL_VERBOSE(("binding module %p to device %p", (void *) ugni_module, (void *) dev)); /* copy module defaults (and function pointers) */ memmove (ugni_module, &mca_btl_ugni_module, sizeof (mca_btl_ugni_module)); ugni_module->initialized = false; ugni_module->nlocal_procs = 0; ugni_module->active_send_count = 0; OBJ_CONSTRUCT(&ugni_module->failed_frags, opal_list_t); OBJ_CONSTRUCT(&ugni_module->eager_frags_send, ompi_free_list_t); OBJ_CONSTRUCT(&ugni_module->eager_frags_recv, ompi_free_list_t); OBJ_CONSTRUCT(&ugni_module->smsg_frags, ompi_free_list_t); OBJ_CONSTRUCT(&ugni_module->rdma_frags, ompi_free_list_t); OBJ_CONSTRUCT(&ugni_module->rdma_int_frags, ompi_free_list_t); OBJ_CONSTRUCT(&ugni_module->pending_smsg_frags_bb, opal_pointer_array_t); OBJ_CONSTRUCT(&ugni_module->ep_wait_list, opal_list_t); OBJ_CONSTRUCT(&ugni_module->endpoints, opal_pointer_array_t); OBJ_CONSTRUCT(&ugni_module->id_to_endpoint, opal_hash_table_t); ugni_module->device = dev; dev->btl_ctx = (void *) ugni_module; /* create wildcard endpoint to listen for connections. * there is no need to bind this endpoint. */ rc = GNI_EpCreate (ugni_module->device->dev_handle, NULL, &ugni_module->wildcard_ep); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { BTL_ERROR(("error creating wildcard ugni endpoint")); return ompi_common_rc_ugni_to_ompi (rc); } /* post wildcard datagram */ rc = mca_btl_ugni_wildcard_ep_post (ugni_module); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { BTL_ERROR(("error posting wildcard datagram")); return rc; } return OMPI_SUCCESS; }
/* Before this function is called, you MUST * A) Deregister the datagram processing function from the async thread. * B) Cancel the wildcard datagram. * C) Drain all other messages from the queue. */ static inline void uct_ugni_udt_terminate_thread(uct_ugni_udt_iface_t *iface) { gni_return_t ugni_rc; gni_ep_handle_t ep; uct_ugni_device_lock(&iface->super.cdm); ugni_rc = GNI_EpCreate(uct_ugni_udt_iface_nic_handle(iface), iface->super.local_cq, &ep); if (GNI_RC_SUCCESS != ugni_rc) { uct_ugni_device_unlock(&iface->super.cdm); ucs_error("GNI_EpCreate, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return; } ugni_rc = GNI_EpBind(ep, iface->super.cdm.dev->address, iface->super.cdm.domain_id); if (GNI_RC_SUCCESS != ugni_rc) { GNI_EpDestroy(ep); uct_ugni_device_unlock(&iface->super.cdm); ucs_error("GNI_EpBind failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return; } ugni_rc = GNI_EpPostDataWId(ep, NULL, 0, NULL, 0, UCT_UGNI_UDT_CANCEL); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("Couldn't send cancel message to UGNI interface! %s %d", gni_err_str[ugni_rc], ugni_rc); } /* When the gni_ep is destroyed the above post will be canceled */ ugni_rc = GNI_EpDestroy(ep); uct_ugni_device_unlock(&iface->super.cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_EpDestroy failed, Error status: %s %d\n", gni_err_str[ugni_rc], ugni_rc); } }
static UCS_CLASS_INIT_FUNC(uct_ugni_udt_iface_t, uct_md_h md, uct_worker_h worker, const uct_iface_params_t *params, const uct_iface_config_t *tl_config) { uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t); ucs_status_t status; uct_ugni_udt_desc_t *desc; gni_return_t ugni_rc; int rc; UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, md, worker, params, &uct_ugni_udt_iface_ops, &config->super UCS_STATS_ARG(NULL)); /* Setting initial configuration */ self->config.udt_seg_size = GNI_DATAGRAM_MAXSIZE; self->config.rx_headroom = params->rx_headroom; self->release_desc.cb = uct_ugni_udt_iface_release_desc; status = ucs_async_pipe_create(&self->event_pipe); if (UCS_OK != status) { ucs_error("Pipe creation failed"); goto exit; } status = ucs_mpool_init(&self->free_desc, 0, uct_ugni_udt_get_diff(self) + self->config.udt_seg_size * 2, uct_ugni_udt_get_diff(self), UCS_SYS_CACHE_LINE_SIZE, /* alignment */ 128, /* grow */ config->mpool.max_bufs, /* max buffers */ &uct_ugni_udt_desc_mpool_ops, "UGNI-UDT-DESC"); if (UCS_OK != status) { ucs_error("Mpool creation failed"); goto clean_pipe; } ugni_rc = GNI_EpCreate(uct_ugni_udt_iface_nic_handle(self), NULL, &self->ep_any); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_EpCreate failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); status = UCS_ERR_NO_DEVICE; goto clean_free_desc; } UCT_TL_IFACE_GET_TX_DESC(&self->super.super, &self->free_desc, desc, goto clean_ep); ucs_debug("First wildcard desc is %p", desc); /* Init any desc */ self->desc_any = desc; status = uct_ugni_udt_ep_any_post(self); if (UCS_OK != status) { /* We can't continue if we can't post the first receive */ ucs_error("Failed to post wildcard request"); goto clean_any_desc; } status = ucs_async_set_event_handler(self->super.super.worker->async->mode, ucs_async_pipe_rfd(&self->event_pipe), POLLIN, uct_ugni_proccess_datagram_pipe, self, self->super.super.worker->async); if (UCS_OK != status) { goto clean_cancel_desc; } pthread_mutex_init(&self->device_lock, NULL); pthread_cond_init(&self->device_condition, NULL); self->events_ready = 0; rc = pthread_create(&self->event_thread, NULL, uct_ugni_udt_device_thread, self); if(0 != rc) { goto clean_remove_event; } return UCS_OK; clean_remove_event: ucs_async_pipe_destroy(&self->event_pipe); clean_cancel_desc: uct_ugni_udt_clean_wildcard(self); clean_any_desc: ucs_mpool_put(self->desc_any); clean_ep: ugni_rc = GNI_EpDestroy(self->ep_any); if (GNI_RC_SUCCESS != ugni_rc) { ucs_warn("GNI_EpDestroy failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); } clean_free_desc: ucs_mpool_cleanup(&self->free_desc, 1); clean_pipe: ucs_async_pipe_destroy(&self->event_pipe); exit: uct_ugni_cleanup_base_iface(&self->super); ucs_error("Failed to activate interface"); return status; }
/* * helper function to initialize an SMSG connection */ static int __gnix_vc_smsg_init(struct gnix_vc *vc, int peer_id, gni_smsg_attr_t *peer_smsg_attr) { int ret = FI_SUCCESS; struct gnix_fid_ep *ep; struct gnix_fid_domain *dom; struct gnix_mbox *mbox = NULL; gni_smsg_attr_t local_smsg_attr; gni_return_t __attribute__((unused)) status; ssize_t __attribute__((unused)) len; GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); assert(vc); ep = vc->ep; assert(ep); dom = ep->domain; if (dom == NULL) return -FI_EINVAL; mbox = vc->smsg_mbox; assert (mbox); local_smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; local_smsg_attr.msg_buffer = mbox->base; local_smsg_attr.buff_size = vc->ep->nic->mem_per_mbox; local_smsg_attr.mem_hndl = *mbox->memory_handle; local_smsg_attr.mbox_offset = (uint64_t)mbox->offset; local_smsg_attr.mbox_maxcredit = dom->params.mbox_maxcredit; local_smsg_attr.msg_maxsize = dom->params.mbox_msg_maxsize; /* * now build the SMSG connection */ fastlock_acquire(&ep->nic->lock); status = GNI_EpCreate(ep->nic->gni_nic_hndl, ep->nic->tx_cq, &vc->gni_ep); if (status != GNI_RC_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "GNI_EpCreate returned %s\n", gni_err_str[status]); ret = gnixu_to_fi_errno(status); goto err; } status = GNI_EpBind(vc->gni_ep, vc->peer_addr.device_addr, vc->peer_addr.cdm_id); if (status != GNI_RC_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "GNI_EpBind returned %s\n", gni_err_str[status]); ret = gnixu_to_fi_errno(status); goto err1; } status = GNI_SmsgInit(vc->gni_ep, &local_smsg_attr, peer_smsg_attr); if (status != GNI_RC_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "GNI_SmsgInit returned %s\n", gni_err_str[status]); ret = gnixu_to_fi_errno(status); goto err1; } status = GNI_EpSetEventData(vc->gni_ep, vc->vc_id, peer_id); if (status != GNI_RC_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "GNI_EpSetEventData returned %s\n", gni_err_str[status]); ret = gnixu_to_fi_errno(status); goto err1; } fastlock_release(&ep->nic->lock); return ret; err1: GNI_EpDestroy(vc->gni_ep); err: fastlock_release(&ep->nic->lock); return ret; }