ucs_status_t ugni_connect_ep(uct_ugni_ep_t *ep, uct_ugni_iface_t *iface, const uct_sockaddr_ugni_t *iface_addr, const uct_devaddr_ugni_t *ugni_dev_addr) { gni_return_t ugni_rc; uct_ugni_cdm_lock(&iface->cdm); ugni_rc = GNI_EpBind(ep->ep, ugni_dev_addr->nic_addr, iface_addr->domain_id); uct_ugni_cdm_unlock(&iface->cdm); if (GNI_RC_SUCCESS != ugni_rc) { uct_ugni_cdm_lock(&iface->cdm); (void)GNI_EpDestroy(ep->ep); uct_ugni_cdm_unlock(&iface->cdm); ucs_error("GNI_EpBind failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_UNREACHABLE; } ucs_debug("Binding ep %p to address (%d %d)", ep, ugni_dev_addr->nic_addr, iface_addr->domain_id); ep->flush_group->flush_comp.count = UCT_UGNI_INIT_FLUSH; return UCS_OK; }
/* Endpoint definition */ UCS_CLASS_INIT_FUNC(uct_ugni_ep_t, const uct_ep_params_t *params) { uct_ugni_iface_t *iface = ucs_derived_of(params->iface, uct_ugni_iface_t); ucs_status_t rc = UCS_OK; gni_return_t ugni_rc; uint32_t *big_hash; self->arb_sched = 0; UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super); self->flush_group = uct_ugni_new_flush_group(iface); #ifdef DEBUG self->flush_group->flush_comp.func = NULL; self->flush_group->parent = NULL; #endif uct_ugni_cdm_lock(&iface->cdm); ugni_rc = GNI_EpCreate(uct_ugni_iface_nic_handle(iface), iface->local_cq, &self->ep); uct_ugni_cdm_unlock(&iface->cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_CdmCreate failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_NO_DEVICE; } ucs_arbiter_group_init(&self->arb_group); big_hash = (void *)&self->ep; self->hash_key = big_hash[0]; if (uct_ugni_check_device_type(iface, GNI_DEVICE_ARIES)) { self->hash_key &= 0x00FFFFFF; } ucs_debug("Adding ep hash %x to iface %p", self->hash_key, iface); sglib_hashed_uct_ugni_ep_t_add(iface->eps, self); return rc; }
static ucs_status_t uct_ugni_smsg_mbox_reg(uct_ugni_smsg_iface_t *iface, uct_ugni_smsg_mbox_t *mbox) { gni_return_t ugni_rc; void *address = (mbox+1); if (0 == iface->bytes_per_mbox) { ucs_error("Unexpected length %zu", iface->bytes_per_mbox); return UCS_ERR_INVALID_PARAM; } uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_MemRegister(uct_ugni_iface_nic_handle(&iface->super), (uint64_t)address, iface->bytes_per_mbox, iface->remote_cq, GNI_MEM_READWRITE, -1, &(mbox->gni_mem)); uct_ugni_cdm_unlock(&iface->super.cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_MemRegister failed (addr %p, size %zu), Error status: %s %d", address, iface->bytes_per_mbox, gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_IO_ERROR; } mbox->base_address = (uintptr_t)address; return UCS_OK; }
static UCS_F_ALWAYS_INLINE ucs_status_t uct_ugni_smsg_ep_am_common_send(uct_ugni_smsg_ep_t *ep, uct_ugni_smsg_iface_t *iface, uint8_t am_id, unsigned header_length, void *header, unsigned payload_length, void *payload, uct_ugni_smsg_desc_t *desc) { gni_return_t gni_rc; if (ucs_unlikely(!uct_ugni_ep_can_send(&ep->super))) { goto exit_no_res; } desc->msg_id = iface->smsg_id++; desc->flush_group = ep->super.flush_group; uct_ugni_cdm_lock(&iface->super.cdm); gni_rc = GNI_SmsgSendWTag(ep->super.ep, header, header_length, payload, payload_length, desc->msg_id, am_id); uct_ugni_cdm_unlock(&iface->super.cdm); if(GNI_RC_SUCCESS != gni_rc){ goto exit_no_res; } ++desc->flush_group->flush_comp.count; ++iface->super.outstanding; sglib_hashed_uct_ugni_smsg_desc_t_add(iface->smsg_list, desc); return UCS_OK; exit_no_res: ucs_trace("Smsg send failed."); ucs_mpool_put(desc); UCS_STATS_UPDATE_COUNTER(ep->super.super.stats, UCT_EP_STAT_NO_RES, 1); return UCS_ERR_NO_RESOURCE; }
static inline ucs_status_t uct_ugni_post_rdma(uct_ugni_rdma_iface_t *iface, uct_ugni_ep_t *ep, uct_ugni_base_desc_t *rdma) { gni_return_t ugni_rc; if (ucs_unlikely(!uct_ugni_ep_can_send(ep))) { ucs_mpool_put(rdma); return UCS_ERR_NO_RESOURCE; } uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_PostRdma(ep->ep, &rdma->desc); uct_ugni_cdm_unlock(&iface->super.cdm); if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { ucs_mpool_put(rdma); if(GNI_RC_ERROR_RESOURCE == ugni_rc || GNI_RC_ERROR_NOMEM == ugni_rc) { ucs_debug("GNI_PostRdma failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_NO_RESOURCE; } else { ucs_error("GNI_PostRdma failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_IO_ERROR; } } ++rdma->flush_group->flush_comp.count; ++iface->super.outstanding; return UCS_INPROGRESS; }
ucs_status_t uct_ugni_smsg_ep_connect_to_ep(uct_ep_h tl_ep, const uct_device_addr_t *dev_addr, const uct_ep_addr_t *ep_addr) { uct_ugni_smsg_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_smsg_ep_t); uct_ugni_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_iface_t); const uct_sockaddr_smsg_ugni_t *iface_addr = (const uct_sockaddr_smsg_ugni_t*)ep_addr; const uct_devaddr_ugni_t *ugni_dev_addr = (const uct_devaddr_ugni_t *)dev_addr; gni_smsg_attr_t *local_attr = (gni_smsg_attr_t*)&ep->smsg_attr->mbox_attr; uct_ugni_compact_smsg_attr_t *compact_remote_attr = (uct_ugni_compact_smsg_attr_t *)&iface_addr->smsg_compact_attr; gni_smsg_attr_t remote_attr; gni_return_t gni_rc; ucs_status_t rc = UCS_OK; uint32_t ep_hash; uncompact_smsg_attr(ucs_derived_of(iface, uct_ugni_smsg_iface_t), compact_remote_attr, &remote_attr); rc = ugni_connect_ep(iface, ugni_dev_addr, &iface_addr->super, &ep->super); if(UCS_OK != rc){ ucs_error("Could not connect ep in smsg"); return rc; } uct_ugni_cdm_lock(&iface->cdm); gni_rc = GNI_SmsgInit(ep->super.ep, local_attr, &remote_attr); uct_ugni_cdm_unlock(&iface->cdm); if(GNI_RC_SUCCESS != gni_rc){ ucs_error("Failed to initalize smsg. %s [%i]", gni_err_str[gni_rc], gni_rc); if(GNI_RC_INVALID_PARAM == gni_rc){ return UCS_ERR_INVALID_PARAM; } else { return UCS_ERR_NO_MEMORY; } } ep_hash = (uint32_t)iface_addr->ep_hash; uct_ugni_cdm_lock(&iface->cdm); gni_rc = GNI_EpSetEventData(ep->super.ep, iface->cdm.domain_id, ep_hash); uct_ugni_cdm_unlock(&iface->cdm); if(GNI_RC_SUCCESS != gni_rc){ ucs_error("Could not set GNI_EpSetEventData!"); } return rc; }
static ucs_status_t uct_ugni_smsg_mbox_dereg(uct_ugni_smsg_iface_t *iface, uct_ugni_smsg_mbox_t *mbox){ gni_return_t ugni_rc; uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_MemDeregister(uct_ugni_iface_nic_handle(&iface->super), &mbox->gni_mem); uct_ugni_cdm_unlock(&iface->super.cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_MemDeregister failed Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_IO_ERROR; } return UCS_OK; }
static UCS_CLASS_CLEANUP_FUNC(uct_ugni_ep_t) { uct_ugni_iface_t *iface = ucs_derived_of(self->super.super.iface, uct_ugni_iface_t); gni_return_t ugni_rc; ucs_debug("Removinig ep hash %x from iface %p", self->hash_key, iface); ucs_arbiter_group_purge(&iface->arbiter, &self->arb_group, uct_ugni_ep_abriter_purge_cb, NULL); uct_ugni_cdm_lock(&iface->cdm); ugni_rc = GNI_EpDestroy(self->ep); uct_ugni_cdm_unlock(&iface->cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_warn("GNI_EpDestroy failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); } sglib_hashed_uct_ugni_ep_t_delete(iface->eps, self); uct_ugni_ep_pending_purge(&self->super.super, NULL, NULL); uct_ugni_put_flush_group(self->flush_group); }