static ucs_status_t ucp_wireup_conn_ack_handler(void *arg, void *data, size_t length, void *desc) { ucp_worker_h worker = arg; ucp_wireup_msg_t *msg = data; ucp_ep_h ep; UCS_ASYNC_BLOCK(&worker->async); ucp_wireup_log(worker, UCP_AM_ID_CONN_ACK, msg, 0); ep = ucp_worker_find_ep(worker, msg->src_uuid); if (ep == NULL) { ucs_debug("ignoring connection request - not exists"); goto out; } if (ep->state & UCP_EP_STATE_REMOTE_CONNECTED) { ucs_debug("ignoring conn_ack - remote already connected"); goto out; } /* * If we got CONN_ACK, it means remote side got our reply, and also * connected to us. */ ucp_ep_remote_connected(ep); out: UCS_ASYNC_UNBLOCK(&worker->async); return UCS_OK; }
static ucs_status_t uct_gdr_copy_query_md_resources(uct_md_resource_desc_t **resources_p, unsigned *num_resources_p) { int num_gpus; gdr_t ctx; cudaError_t cudaErr; cudaErr = cudaGetDeviceCount(&num_gpus); if ((cudaErr != cudaSuccess) || (num_gpus == 0)) { ucs_debug("not found cuda devices"); *resources_p = NULL; *num_resources_p = 0; return UCS_OK; } ctx = gdr_open(); if (ctx == NULL) { ucs_debug("could not open gdr copy. disabling gdr copy resource"); *resources_p = NULL; *num_resources_p = 0; return UCS_OK; } gdr_close(ctx); return uct_single_md_resource(&uct_gdr_copy_md_component, resources_p, num_resources_p); }
/* send a signal to remote interface using Unix-domain socket */ static ucs_status_t uct_mm_ep_signal_remote(uct_mm_ep_t *ep, uct_mm_iface_conn_signal_t sig) { uct_mm_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_mm_iface_t); int ret; /** * Send connect message to remote interface */ ret = sendto(iface->signal_fd, &sig, sizeof(sig), 0, (const struct sockaddr*)&ep->cached_signal_sockaddr, ep->cached_signal_addrlen); if (ret >= 0) { ucs_assert(ret == sizeof(sig)); ucs_debug("Sent connect from socket %d to %p", iface->signal_fd, (const struct sockaddr*)&ep->cached_signal_sockaddr); if (ep->cbq_elem_on) { uct_mm_ep_remove_slow_path_callback(iface, ep); } /* point the ep->fifo_ctl to the remote fifo */ uct_mm_ep_connected(ep); return UCS_OK; } else if (errno == EAGAIN) { /* If sending a signal has failed, retry. * Note that by default the receiver might have a limited backlog, * on Linux systems it is net.unix.max_dgram_qlen (10 by default). */ ucs_debug("Failed to send connect from socket %d to %p", iface->signal_fd, (const struct sockaddr*)&ep->cached_signal_sockaddr); /* If sending the Connect message failed with EAGAIN, try again later. * Don't keep trying now in a loop since this may cause a deadlock which * prevents the reading of incoming messages which blocks the remote sender. * Add the sending attempt as a callback to a slow progress. */ if ((!ep->cbq_elem_on) && (sig == UCT_MM_IFACE_SIGNAL_CONNECT)) { ep->cbq_elem.cb = uct_mm_ep_signal_remote_slow_path_callback; uct_worker_slowpath_progress_register(iface->super.worker, &ep->cbq_elem); ep->cbq_elem_on = 1; } /* Return UCS_OK in this case even though couldn't send, so that the * calling flow would release the lock and allow the reading of incoming * Connect messages. */ return UCS_OK; } else { if (errno == ECONNREFUSED) { ucs_debug("failed to send connect signal: connection refused"); } else { ucs_error("failed to send connect signal: %m"); } return UCS_ERR_IO_ERROR; } }
static ucs_status_t ucp_wireup_conn_req_handler(void *arg, void *data, size_t length, void *desc) { ucp_worker_h worker = arg; ucp_wireup_msg_t *msg = data; ucs_status_t status; ucp_ep_h ep; UCS_ASYNC_BLOCK(&worker->async); ucp_wireup_log(worker, UCP_AM_ID_CONN_REQ, msg, 0); ep = ucp_worker_find_ep(worker, msg->src_uuid); if (ep == NULL) { ucs_debug("ignoring connection request - not exists"); goto out; } if (ep->state & UCP_EP_STATE_LOCAL_CONNECTED) { ucs_debug("ignoring connection request - already connected"); /* TODO allow active-passive connection establishment */ goto out; } if (ep->uct.rsc_index != msg->dst_rsc_index) { ucs_error("got connection request on a different resource (got: %d, expected: %d)", msg->dst_rsc_index, ep->uct.rsc_index); /* TODO send reject, and use different transport */ goto out; } status = uct_ep_connect_to_ep(ep->uct.next_ep, (struct sockaddr *)(msg + 1)); if (status != UCS_OK) { ucs_debug("failed to connect"); /* TODO send reject */ goto out; } ep->state |= UCP_EP_STATE_LOCAL_CONNECTED; status = ucp_ep_wireup_send(ep, ep->wireup_ep, UCP_AM_ID_CONN_REP, msg->src_rsc_index); if (status != UCS_OK) { goto out; } ep->state |= UCP_EP_STATE_CONN_REP_SENT; out: UCS_ASYNC_UNBLOCK(&worker->async); return UCS_OK; }
ucs_status_t ucp_mem_map(ucp_context_h context, void **address_p, size_t length, unsigned flags, ucp_mem_h *memh_p) { ucs_status_t status; ucp_mem_h memh; if (length == 0) { ucs_debug("mapping zero length buffer, return dummy memh"); *memh_p = &ucp_mem_dummy_handle; return UCS_OK; } /* Allocate the memory handle */ ucs_assert(context->num_pds > 0); memh = ucs_malloc(sizeof(*memh) + context->num_pds * sizeof(memh->uct[0]), "ucp_memh"); if (memh == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } if (*address_p == NULL) { status = ucp_mem_alloc(context, length, "user allocation", memh); if (status != UCS_OK) { goto err_free_memh; } *address_p = memh->address; } else { ucs_debug("registering user memory at %p length %zu", *address_p, length); memh->address = *address_p; memh->length = length; memh->alloc_method = UCT_ALLOC_METHOD_LAST; memh->alloc_pd = NULL; status = ucp_memh_reg_pds(context, memh, UCT_INVALID_MEM_HANDLE); if (status != UCS_OK) { goto err_free_memh; } } ucs_debug("%s buffer %p length %zu memh %p pd_map 0x%"PRIx64, (memh->alloc_method == UCT_ALLOC_METHOD_LAST) ? "mapped" : "allocated", memh->address, memh->length, memh, memh->pd_map); *memh_p = memh; return UCS_OK; err_free_memh: ucs_free(memh); err: return status; }
ucs_status_t uct_ud_ep_connect_to_ep(uct_ud_ep_t *ep, const struct sockaddr *addr) { uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t); uct_ib_device_t *dev = uct_ib_iface_device(&iface->super); const uct_sockaddr_ib_t *ib_addr = (uct_sockaddr_ib_t *)addr; ucs_assert_always(ep->dest_ep_id == UCT_UD_EP_NULL_ID); ucs_trace_func(""); ep->dest_ep_id = ib_addr->id; ep->dest_qpn = ib_addr->qp_num; uct_ud_ep_reset(ep); ucs_debug("%s:%d slid=%d qpn=%d ep=%u connected to dlid=%d qpn=%d ep=%u", ibv_get_device_name(dev->ibv_context->device), iface->super.port_num, dev->port_attr[iface->super.port_num-dev->first_port].lid, iface->qp->qp_num, ep->ep_id, ib_addr->lid, ep->dest_qpn, ep->dest_ep_id); return UCS_OK; }
static UCS_CLASS_CLEANUP_FUNC(uct_rdmacm_ep_t) { uct_rdmacm_iface_t *iface = ucs_derived_of(self->super.super.iface, uct_rdmacm_iface_t); ucs_debug("rdmacm_ep %p: destroying", self); UCS_ASYNC_BLOCK(iface->super.worker->async); if (self->is_on_pending) { ucs_list_del(&self->list_elem); self->is_on_pending = 0; } /* remove the slow progress function in case it was placed on the slow progress * chain but wasn't invoked yet */ uct_worker_progress_unregister_safe(&iface->super.worker->super, &self->slow_prog_id); /* if the destroyed ep is the active one on the iface, mark it as destroyed * so that arriving events on the iface won't try to access this ep */ if (iface->ep == self) { iface->ep = UCT_RDMACM_IFACE_BLOCKED_NO_EP; } UCS_ASYNC_UNBLOCK(iface->super.worker->async); ucs_free(self->priv_data); }
static inline ssize_t uct_ugni_post_fma(uct_ugni_rdma_iface_t *iface, uct_ugni_ep_t *ep, uct_ugni_base_desc_t *fma, ssize_t ok_status) { gni_return_t ugni_rc; if (ucs_unlikely(!uct_ugni_ep_can_send(ep))) { ucs_mpool_put(fma); return UCS_ERR_NO_RESOURCE; } uct_ugni_device_lock(&iface->super.cdm); ugni_rc = GNI_PostFma(ep->ep, &fma->desc); uct_ugni_device_unlock(&iface->super.cdm); if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { ucs_mpool_put(fma); if(GNI_RC_ERROR_RESOURCE == ugni_rc || GNI_RC_ERROR_NOMEM == ugni_rc) { ucs_debug("GNI_PostFma failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_NO_RESOURCE; } else { ucs_error("GNI_PostFma failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_IO_ERROR; } } ++fma->flush_group->flush_comp.count; ++iface->super.outstanding; return ok_status; }
void uct_dc_mlx5_ep_release(uct_dc_mlx5_ep_t *ep) { ucs_assert_always(!(ep->flags & UCT_DC_MLX5_EP_FLAG_VALID)); ucs_debug("release dc_mlx5_ep %p", ep); ucs_list_del(&ep->list); ucs_free(ep); }
static UCS_CLASS_CLEANUP_FUNC(uct_dc_mlx5_ep_t) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(self->super.super.iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_pending_purge(&self->super.super, NULL, NULL); ucs_arbiter_group_cleanup(&self->arb_group); uct_rc_fc_cleanup(&self->fc); ucs_assert_always(self->flags & UCT_DC_MLX5_EP_FLAG_VALID); if ((self->dci == UCT_DC_MLX5_EP_NO_DCI) || uct_dc_mlx5_iface_is_dci_rand(iface)) { return; } /* TODO: this is good for dcs policy only. * Need to change if eps share dci */ ucs_assertv_always(uct_dc_mlx5_iface_dci_has_outstanding(iface, self->dci), "iface (%p) ep (%p) dci leak detected: dci=%d", iface, self, self->dci); /* we can handle it but well behaving app should not do this */ ucs_debug("ep (%p) is destroyed with %d outstanding ops", self, (int16_t)iface->super.super.config.tx_qp_len - uct_rc_txqp_available(&iface->tx.dcis[self->dci].txqp)); uct_rc_txqp_purge_outstanding(&iface->tx.dcis[self->dci].txqp, UCS_ERR_CANCELED, 1); iface->tx.dcis[self->dci].ep = NULL; #if ENABLE_ASSERT iface->tx.dcis[self->dci].flags |= UCT_DC_DCI_FLAG_EP_DESTROYED; #endif }
ucs_status_t ugni_activate_iface(uct_ugni_iface_t *iface) { ucs_status_t status; gni_return_t ugni_rc; uint32_t pe_address; if(iface->activated) { return UCS_OK; } status = uct_ugni_init_nic(0, &iface->domain_id, &iface->cdm_handle, &iface->nic_handle, &pe_address); if (UCS_OK != status) { ucs_error("Failed to UGNI NIC, Error status: %d", status); return status; } ucs_debug("Made ugni interface. iface->dev->nic_addr = %i iface->domain_id = %i", iface->dev->address, iface->domain_id); ugni_rc = GNI_CqCreate(iface->nic_handle, UCT_UGNI_LOCAL_CQ, 0, GNI_CQ_NOBLOCK, NULL, NULL, &iface->local_cq); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_CqCreate failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_NO_DEVICE; } iface->activated = true; /* iface is activated */ return UCS_OK; }
static ucs_status_t ucp_ep_new(ucp_worker_h worker, uint64_t dest_uuid, const char *peer_name, const char *message, ucp_ep_h *ep_p) { ucp_ep_h ep; ep = ucs_calloc(1, sizeof(*ep), "ucp ep"); if (ep == NULL) { ucs_error("Failed to allocate ep"); return UCS_ERR_NO_MEMORY; } ep->worker = worker; ep->dest_uuid = dest_uuid; ep->rma_dst_pdi = UCP_NULL_RESOURCE; ep->amo_dst_pdi = UCP_NULL_RESOURCE; ep->cfg_index = 0; ep->flags = 0; #if ENABLE_DEBUG_DATA ucs_snprintf_zero(ep->peer_name, UCP_WORKER_NAME_MAX, "%s", peer_name); #endif sglib_hashed_ucp_ep_t_add(worker->ep_hash, ep); *ep_p = ep; ucs_debug("created ep %p to %s 0x%"PRIx64"->0x%"PRIx64" %s", ep, peer_name, worker->uuid, ep->dest_uuid, message); return UCS_OK; }
/* add new handler to the table */ static ucs_status_t ucs_async_handler_add(ucs_async_handler_t *handler) { int hash_extra_status; ucs_status_t status; khiter_t hash_it; pthread_rwlock_wrlock(&ucs_async_global_context.handlers_lock); ucs_assert_always(handler->refcount == 1); hash_it = kh_put(ucs_async_handler, &ucs_async_global_context.handlers, handler->id, &hash_extra_status); if (hash_extra_status == -1) { ucs_error("Failed to add async handler " UCS_ASYNC_HANDLER_FMT " to hash", UCS_ASYNC_HANDLER_ARG(handler)); status = UCS_ERR_NO_MEMORY; goto out_unlock; } else if (hash_extra_status == 0) { ucs_error("Async handler " UCS_ASYNC_HANDLER_FMT " exists - cannot add %s()", UCS_ASYNC_HANDLER_ARG(kh_value(&ucs_async_global_context.handlers, hash_it)), ucs_debug_get_symbol_name(handler->cb)); status = UCS_ERR_ALREADY_EXISTS; goto out_unlock; } ucs_assert_always(!ucs_async_handler_kh_is_end(hash_it)); kh_value(&ucs_async_global_context.handlers, hash_it) = handler; ucs_debug("added async handler " UCS_ASYNC_HANDLER_FMT " to hash", UCS_ASYNC_HANDLER_ARG(handler)); status = UCS_OK; out_unlock: pthread_rwlock_unlock(&ucs_async_global_context.handlers_lock); return status; }
static UCS_F_ALWAYS_INLINE void ucp_tag_recv_request_init(ucp_request_t *req, ucp_worker_h worker, void* buffer, size_t count, ucp_datatype_t datatype, uint16_t req_flags) { ucp_dt_generic_t *dt_gen; req->flags = UCP_REQUEST_FLAG_EXPECTED | UCP_REQUEST_FLAG_RECV | req_flags; req->recv.state.offset = 0; req->recv.worker = worker; switch (datatype & UCP_DATATYPE_CLASS_MASK) { case UCP_DATATYPE_IOV: req->recv.state.dt.iov.iov_offset = 0; req->recv.state.dt.iov.iovcnt_offset = 0; req->recv.state.dt.iov.iovcnt = count; req->recv.state.dt.iov.memh = UCT_MEM_HANDLE_NULL; break; case UCP_DATATYPE_GENERIC: dt_gen = ucp_dt_generic(datatype); req->recv.state.dt.generic.state = UCS_PROFILE_NAMED_CALL("dt_start", dt_gen->ops.start_unpack, dt_gen->context, buffer, count); ucs_debug("req %p buffer %p count %zu dt_gen state=%p", req, buffer, count, req->recv.state.dt.generic.state); break; default: break; } if (ucs_log_enabled(UCS_LOG_LEVEL_TRACE_REQ)) { req->recv.info.sender_tag = 0; } }
ucs_status_t ucp_mem_unmap(ucp_context_h context, ucp_mem_h memh) { uct_allocated_memory_t mem; uct_mem_h alloc_pd_memh; ucs_status_t status; ucs_debug("unmapping buffer %p memh %p", memh->address, memh); if (memh == &ucp_mem_dummy_handle) { return UCS_OK; } /* Unregister from all protection domains */ status = ucp_memh_dereg_pds(context, memh, &alloc_pd_memh); if (status != UCS_OK) { return status; } /* If the memory was also allocated, release it */ if (memh->alloc_method != UCT_ALLOC_METHOD_LAST) { mem.address = memh->address; mem.length = memh->length; mem.method = memh->alloc_method; mem.pd = memh->alloc_pd; /* May be NULL if method is not PD */ mem.memh = alloc_pd_memh; /* May be INVALID if method is not PD */ status = uct_mem_free(&mem); if (status != UCS_OK) { return status; } } ucs_free(memh); return UCS_OK; }
/* Endpoint definition */ UCS_CLASS_INIT_FUNC(uct_ugni_ep_t, const uct_ep_params_t *params) { uct_ugni_iface_t *iface = ucs_derived_of(params->iface, uct_ugni_iface_t); ucs_status_t rc = UCS_OK; gni_return_t ugni_rc; uint32_t *big_hash; self->arb_sched = 0; UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super); self->flush_group = uct_ugni_new_flush_group(iface); #ifdef DEBUG self->flush_group->flush_comp.func = NULL; self->flush_group->parent = NULL; #endif uct_ugni_cdm_lock(&iface->cdm); ugni_rc = GNI_EpCreate(uct_ugni_iface_nic_handle(iface), iface->local_cq, &self->ep); uct_ugni_cdm_unlock(&iface->cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_CdmCreate failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_NO_DEVICE; } ucs_arbiter_group_init(&self->arb_group); big_hash = (void *)&self->ep; self->hash_key = big_hash[0]; if (uct_ugni_check_device_type(iface, GNI_DEVICE_ARIES)) { self->hash_key &= 0x00FFFFFF; } ucs_debug("Adding ep hash %x to iface %p", self->hash_key, iface); sglib_hashed_uct_ugni_ep_t_add(iface->eps, self); return rc; }
ucs_status_t ugni_connect_ep(uct_ugni_ep_t *ep, uct_ugni_iface_t *iface, const uct_sockaddr_ugni_t *iface_addr, const uct_devaddr_ugni_t *ugni_dev_addr) { gni_return_t ugni_rc; uct_ugni_cdm_lock(&iface->cdm); ugni_rc = GNI_EpBind(ep->ep, ugni_dev_addr->nic_addr, iface_addr->domain_id); uct_ugni_cdm_unlock(&iface->cdm); if (GNI_RC_SUCCESS != ugni_rc) { uct_ugni_cdm_lock(&iface->cdm); (void)GNI_EpDestroy(ep->ep); uct_ugni_cdm_unlock(&iface->cdm); ucs_error("GNI_EpBind failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_UNREACHABLE; } ucs_debug("Binding ep %p to address (%d %d)", ep, ugni_dev_addr->nic_addr, iface_addr->domain_id); ep->flush_group->flush_comp.count = UCT_UGNI_INIT_FLUSH; return UCS_OK; }
static ucs_status_t uct_ugni_rkey_unpack(uct_md_component_t *mdc, const void *rkey_buffer, uct_rkey_t *rkey_p, void **handle_p) { const uint64_t *ptr = rkey_buffer; gni_mem_handle_t *mem_hndl = NULL; uint64_t magic = 0; ucs_debug("Unpacking [ %"PRIx64" %"PRIx64" %"PRIx64"]", ptr[0], ptr[1], ptr[2]); magic = ptr[0]; if (magic != UCT_UGNI_RKEY_MAGIC) { ucs_error("Failed to identify key. Expected %llx but received %"PRIx64"", UCT_UGNI_RKEY_MAGIC, magic); return UCS_ERR_UNSUPPORTED; } mem_hndl = ucs_malloc(sizeof(gni_mem_handle_t), "gni_mem_handle_t"); if (NULL == mem_hndl) { ucs_error("Failed to allocate memory for gni_mem_handle_t"); return UCS_ERR_NO_MEMORY; } mem_hndl->qword1 = ptr[1]; mem_hndl->qword2 = ptr[2]; *rkey_p = (uintptr_t)mem_hndl; *handle_p = NULL; return UCS_OK; }
static void uct_ud_ep_rx_creq(uct_ud_iface_t *iface, uct_ud_neth_t *neth) { uct_ud_ep_t *ep; uct_ud_ctl_hdr_t *ctl = (uct_ud_ctl_hdr_t *)(neth + 1); ucs_assert_always(ctl->type == UCT_UD_PACKET_CREQ); ep = uct_ud_iface_cep_lookup(iface, &ctl->conn_req.ib_addr, ctl->conn_req.conn_id); if (!ep) { ep = uct_ud_ep_create_passive(iface, ctl); ucs_assert_always(ep != NULL); ep->rx.ooo_pkts.head_sn = neth->psn; } else { if (ep->dest_ep_id == UCT_UD_EP_NULL_ID) { /* simultaniuos CREQ */ ep->dest_ep_id = ctl->conn_req.ib_addr.id; ep->rx.ooo_pkts.head_sn = neth->psn; ucs_debug("created ep=%p (iface=%p conn_id=%d ep_id=%d, dest_ep_id=%d rx_psn=%u)", ep, iface, ep->conn_id, ep->ep_id, ep->dest_ep_id, ep->rx.ooo_pkts.head_sn); } } ucs_assert_always(ctl->conn_req.conn_id == ep->conn_id); ucs_assert_always(ctl->conn_req.ib_addr.id == ep->dest_ep_id); /* creq must always have same psn */ ucs_assert_always(ep->rx.ooo_pkts.head_sn == neth->psn); /* scedule connection reply op */ UCT_UD_EP_HOOK_CALL_RX(ep, neth); uct_ud_iface_queue_pending(iface, ep, UCT_UD_EP_OP_CREP); }
static UCS_CLASS_INIT_FUNC(uct_self_iface_t, uct_md_h md, uct_worker_h worker, const uct_iface_params_t *params, const uct_iface_config_t *tl_config) { ucs_status_t status; uct_self_iface_config_t *self_config = 0; ucs_trace_func("Creating a loop-back transport self=%p rxh=%lu", self, params->rx_headroom); if (strcmp(params->dev_name, UCT_SELF_NAME) != 0) { ucs_error("No device was found: %s", params->dev_name); return UCS_ERR_NO_DEVICE; } UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_self_iface_ops, md, worker, tl_config UCS_STATS_ARG(params->stats_root) UCS_STATS_ARG(UCT_SELF_NAME)); self_config = ucs_derived_of(tl_config, uct_self_iface_config_t); self->id = ucs_generate_uuid((uintptr_t)self); self->rx_headroom = params->rx_headroom; self->data_length = self_config->super.max_bcopy; self->release_desc.cb = uct_self_iface_release_desc; /* create a memory pool for data transferred */ status = uct_iface_mpool_init(&self->super, &self->msg_desc_mp, sizeof(uct_recv_desc_t) + self->rx_headroom + self->data_length, sizeof(uct_recv_desc_t) + self->rx_headroom, UCS_SYS_CACHE_LINE_SIZE, &self_config->mp, 256, ucs_empty_function, "self_msg_desc"); if (UCS_OK != status) { ucs_error("Failed to create a memory pool for the loop-back transport"); goto err; } /* set the message descriptor for the loop-back */ self->msg_cur_desc = ucs_mpool_get(&self->msg_desc_mp); VALGRIND_MAKE_MEM_DEFINED(self->msg_cur_desc, sizeof(*(self->msg_cur_desc))); if (NULL == self->msg_cur_desc) { ucs_error("Failed to get the first descriptor in loop-back MP storage"); status = UCS_ERR_NO_RESOURCE; goto destroy_mpool; } ucs_debug("Created a loop-back iface. id=0x%lx, desc=%p, len=%u, tx_hdr=%lu", self->id, self->msg_cur_desc, self->data_length, self->rx_headroom); return UCS_OK; destroy_mpool: ucs_mpool_cleanup(&self->msg_desc_mp, 1); err: return status; }
static UCS_CLASS_INIT_FUNC(uct_dc_mlx5_ep_t, uct_iface_t *tl_iface, const uct_device_addr_t *dev_addr, const uct_iface_addr_t *iface_addr) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_iface, uct_dc_mlx5_iface_t); const uct_ib_address_t *ib_addr = (const uct_ib_address_t *)dev_addr; const uct_dc_iface_addr_t *if_addr = (const uct_dc_iface_addr_t *)iface_addr; ucs_status_t status; int is_global; ucs_trace_func(""); UCS_CLASS_CALL_SUPER_INIT(uct_dc_ep_t, &iface->super, if_addr); status = uct_ud_mlx5_iface_get_av(&iface->super.super.super, &iface->ud_common, ib_addr, iface->super.super.super.path_bits[0], &self->av, NULL, &is_global); if (status != UCS_OK) { return UCS_ERR_INVALID_ADDR; } self->av.dqp_dct |= htonl(uct_ib_unpack_uint24(if_addr->qp_num)); ucs_debug("created ep %p on iface %p", self, iface); return UCS_OK; }
static ucs_arbiter_cb_result_t uct_dc_mlx5_ep_abriter_purge_cb(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, void *arg) { uct_purge_cb_args_t *cb_args = arg; uct_pending_purge_callback_t cb = cb_args->cb; uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); uct_rc_fc_request_t *freq = ucs_derived_of(req, uct_rc_fc_request_t); uct_dc_mlx5_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_dc_mlx5_ep_t, arb_group); if (ucs_likely(req->func != uct_dc_mlx5_iface_fc_grant)){ if (cb != NULL) { cb(req, cb_args->arg); } else { ucs_debug("ep=%p cancelling user pending request %p", ep, req); } } else { /* User callback should not be called for FC messages. * Just return pending request memory to the pool */ ucs_mpool_put(freq); } return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; }
ucs_status_t ucp_ep_new(ucp_worker_h worker, uint64_t dest_uuid, const char *peer_name, const char *message, ucp_ep_h *ep_p) { ucp_ep_h ep; ep = ucs_calloc(1, sizeof(*ep), "ucp ep"); if (ep == NULL) { ucs_error("Failed to allocate ep"); return UCS_ERR_NO_MEMORY; } ep->worker = worker; ep->uct_ep = NULL; ep->config.max_short_egr = SIZE_MAX; ep->config.max_bcopy_egr = SIZE_MAX; ep->config.max_short_put = SIZE_MAX; ep->config.max_bcopy_put = SIZE_MAX; ep->config.max_bcopy_get = SIZE_MAX; ep->dest_uuid = dest_uuid; ep->rsc_index = -1; ep->dst_pd_index = -1; ep->state = 0; sglib_hashed_ucp_ep_t_add(worker->ep_hash, ep); #if ENABLE_DEBUG_DATA ucs_snprintf_zero(ep->peer_name, UCP_PEER_NAME_MAX, "%s", peer_name); #endif ucs_debug("created ep %p to %s 0x%"PRIx64"->0x%"PRIx64" %s", ep, ucp_ep_peer_name(ep), worker->uuid, ep->dest_uuid, message); *ep_p = ep; return UCS_OK; }
ucs_status_t progress_remote_cq(uct_ugni_smsg_iface_t *iface) { gni_return_t ugni_rc; gni_cq_entry_t event_data; uct_ugni_ep_t tl_ep; uct_ugni_ep_t *ugni_ep; uct_ugni_smsg_ep_t *ep; ugni_rc = GNI_CqGetEvent(iface->remote_cq, &event_data); if(GNI_RC_NOT_DONE == ugni_rc){ return UCS_OK; } if (GNI_RC_SUCCESS != ugni_rc || !GNI_CQ_STATUS_OK(event_data) || GNI_CQ_OVERRUN(event_data)) { if(GNI_RC_ERROR_RESOURCE == ugni_rc || (GNI_RC_SUCCESS == ugni_rc && GNI_CQ_OVERRUN(event_data))){ ucs_debug("Detected remote CQ overrun. ungi_rc = %d [%s]", ugni_rc, gni_err_str[ugni_rc]); uct_ugni_smsg_handle_remote_overflow(iface); return UCS_OK; } ucs_error("GNI_CqGetEvent falied with unhandled error. Error status %s %d ", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_IO_ERROR; } tl_ep.hash_key = GNI_CQ_GET_INST_ID(event_data); ugni_ep = sglib_hashed_uct_ugni_ep_t_find_member(iface->super.eps, &tl_ep); ep = ucs_derived_of(ugni_ep, uct_ugni_smsg_ep_t); process_mbox(iface, ep); return UCS_INPROGRESS; }
/* send a signal to remote interface using Unix-domain socket */ static ucs_status_t uct_mm_ep_signal_remote(uct_mm_ep_t *ep, uct_mm_iface_conn_signal_t sig) { uct_mm_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_mm_iface_t); int ret; /** * Send connect message to remote interface */ for (;;) { ret = sendto(iface->signal_fd, &sig, sizeof(sig), 0, (const struct sockaddr*)&ep->cached_signal_sockaddr, ep->cached_signal_addrlen); if (ret >= 0) { ucs_assert(ret == sizeof(sig)); return UCS_OK; } else if (errno == EAGAIN) { /* If sending a signal has failed, retry. * Note the by default the receiver might have a limited backlog, * on Linux systems it is net.unix.max_dgram_qlen (10 by default). */ uct_mm_iface_recv_messages(iface); } else { if ((sig == UCT_MM_IFACE_SIGNAL_DISCONNECT) && (errno == ECONNREFUSED)) { ucs_debug("failed to send disconnect signal: connection refused"); } else { ucs_error("failed to send %sconnect signal: %m", (sig == UCT_MM_IFACE_SIGNAL_CONNECT) ? "" : "dis"); } return UCS_ERR_IO_ERROR; } } }
void uct_dc_ep_release(uct_dc_ep_t *ep) { ucs_assert_always(ep->state == UCT_DC_EP_INVALID); ucs_debug("release dc_ep %p", ep); ucs_list_del(&ep->list); ucs_free(ep); }
static inline ucs_status_t uct_ugni_post_rdma(uct_ugni_rdma_iface_t *iface, uct_ugni_ep_t *ep, uct_ugni_base_desc_t *rdma) { gni_return_t ugni_rc; if (ucs_unlikely(!uct_ugni_can_send(ep))) { ucs_mpool_put(rdma); return UCS_ERR_NO_RESOURCE; } ugni_rc = GNI_PostRdma(ep->ep, &rdma->desc); if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { ucs_mpool_put(rdma); if(GNI_RC_ERROR_RESOURCE == ugni_rc || GNI_RC_ERROR_NOMEM == ugni_rc) { ucs_debug("GNI_PostRdma failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_NO_RESOURCE; } else { ucs_error("GNI_PostRdma failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_IO_ERROR; } } ++ep->outstanding; ++iface->super.outstanding; return UCS_INPROGRESS; }
static void UCS_F_CTOR ucs_init() { ucs_check_cpu_flags(); ucs_log_early_init(); /* Must be called before all others */ ucs_global_opts_init(); ucs_log_init(); #if ENABLE_STATS ucs_stats_init(); #endif ucs_memtrack_init(); ucs_debug_init(); ucs_profile_global_init(); ucs_async_global_init(); ucs_debug("%s loaded at 0x%lx", ucs_debug_get_lib_path(), ucs_debug_get_lib_base_addr()); ucs_debug("cmd line: %s", ucs_get_process_cmdline()); }
static ucs_status_t get_nic_address(uct_ugni_device_t *dev_p) { int alps_addr = -1; int alps_dev_id = -1; int i; char *token, *pmi_env; pmi_env = getenv("PMI_GNI_DEV_ID"); if (NULL == pmi_env) { gni_return_t ugni_rc; ugni_rc = GNI_CdmGetNicAddress(dev_p->device_id, &dev_p->address, &dev_p->cpu_id); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_CdmGetNicAddress failed, device %d, Error status: %s %d", dev_p->device_id, gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_NO_DEVICE; } CPU_SET(dev_p->cpu_id, &(dev_p->cpu_mask)); ucs_debug("(GNI) NIC address: %d", dev_p->address); } else { while ((token = strtok(pmi_env, ":")) != NULL) { alps_dev_id = atoi(token); if (alps_dev_id == dev_p->device_id) { break; } pmi_env = NULL; } ucs_assert(alps_dev_id != -1); pmi_env = getenv("PMI_GNI_LOC_ADDR"); ucs_assert(NULL != pmi_env); i = 0; while ((token = strtok(pmi_env, ":")) != NULL) { if (i == alps_dev_id) { alps_addr = atoi(token); break; } pmi_env = NULL; ++i; } ucs_assert(alps_addr != -1); dev_p->address = alps_addr; ucs_debug("(PMI) NIC address: %d", dev_p->address); } return UCS_OK; }
ucs_status_t ucp_ep_new(ucp_worker_h worker, uint64_t dest_uuid, const char *peer_name, const char *message, ucp_ep_h *ep_p) { ucs_status_t status; ucp_ep_config_key_t key; ucp_ep_h ep; khiter_t hash_it; int hash_extra_status = 0; ep = ucs_calloc(1, sizeof(*ep), "ucp ep"); if (ep == NULL) { ucs_error("Failed to allocate ep"); status = UCS_ERR_NO_MEMORY; goto err; } /* EP configuration without any lanes */ memset(&key, 0, sizeof(key)); key.rma_lane_map = 0; key.amo_lane_map = 0; key.reachable_md_map = 0; key.am_lane = UCP_NULL_RESOURCE; key.rndv_lane = UCP_NULL_RESOURCE; key.wireup_msg_lane = UCP_NULL_LANE; key.num_lanes = 0; memset(key.amo_lanes, UCP_NULL_LANE, sizeof(key.amo_lanes)); ep->worker = worker; ep->dest_uuid = dest_uuid; ep->cfg_index = ucp_worker_get_ep_config(worker, &key); ep->am_lane = UCP_NULL_LANE; ep->flags = 0; #if ENABLE_DEBUG_DATA ucs_snprintf_zero(ep->peer_name, UCP_WORKER_NAME_MAX, "%s", peer_name); #endif hash_it = kh_put(ucp_worker_ep_hash, &worker->ep_hash, dest_uuid, &hash_extra_status); if (ucs_unlikely(hash_it == kh_end(&worker->ep_hash))) { ucs_error("Hash failed with ep %p to %s 0x%"PRIx64"->0x%"PRIx64" %s " "with status %d", ep, peer_name, worker->uuid, ep->dest_uuid, message, hash_extra_status); status = UCS_ERR_NO_RESOURCE; goto err_free_ep; } kh_value(&worker->ep_hash, hash_it) = ep; *ep_p = ep; ucs_debug("created ep %p to %s 0x%"PRIx64"->0x%"PRIx64" %s", ep, peer_name, worker->uuid, ep->dest_uuid, message); return UCS_OK; err_free_ep: ucs_free(ep); err: return status; }