/* add new handler to the table */ static ucs_status_t ucs_async_handler_add(ucs_async_handler_t *handler) { int hash_extra_status; ucs_status_t status; khiter_t hash_it; pthread_rwlock_wrlock(&ucs_async_global_context.handlers_lock); ucs_assert_always(handler->refcount == 1); hash_it = kh_put(ucs_async_handler, &ucs_async_global_context.handlers, handler->id, &hash_extra_status); if (hash_extra_status == -1) { ucs_error("Failed to add async handler " UCS_ASYNC_HANDLER_FMT " to hash", UCS_ASYNC_HANDLER_ARG(handler)); status = UCS_ERR_NO_MEMORY; goto out_unlock; } else if (hash_extra_status == 0) { ucs_error("Async handler " UCS_ASYNC_HANDLER_FMT " exists - cannot add %s()", UCS_ASYNC_HANDLER_ARG(kh_value(&ucs_async_global_context.handlers, hash_it)), ucs_debug_get_symbol_name(handler->cb)); status = UCS_ERR_ALREADY_EXISTS; goto out_unlock; } ucs_assert_always(!ucs_async_handler_kh_is_end(hash_it)); kh_value(&ucs_async_global_context.handlers, hash_it) = handler; ucs_debug("added async handler " UCS_ASYNC_HANDLER_FMT " to hash", UCS_ASYNC_HANDLER_ARG(handler)); status = UCS_OK; out_unlock: pthread_rwlock_unlock(&ucs_async_global_context.handlers_lock); return status; }
uct_ud_send_skb_t *uct_ud_ep_prepare_crep(uct_ud_ep_t *ep) { uct_ud_send_skb_t *skb; uct_ud_neth_t *neth; uct_ud_ctl_hdr_t *crep; uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t); ucs_assert_always(ep->dest_ep_id != UCT_UD_EP_NULL_ID); ucs_assert_always(ep->ep_id != UCT_UD_EP_NULL_ID); skb = uct_ud_iface_get_tx_skb(iface, ep); if (!skb) { return NULL; } neth = skb->neth; uct_ud_neth_init_data(ep, neth); neth->packet_type = ep->dest_ep_id; neth->packet_type |= (UCT_UD_PACKET_FLAG_ACK_REQ|UCT_UD_PACKET_FLAG_CTL); crep = (uct_ud_ctl_hdr_t *)(neth + 1); crep->type = UCT_UD_PACKET_CREP; crep->conn_rep.src_ep_id = ep->ep_id; skb->len = sizeof(*neth) + sizeof(*crep); UCT_UD_EP_HOOK_CALL_TX(ep, skb->neth); uct_ud_iface_complete_tx_skb_nolog(iface, ep, skb); /* uct_ud_ep_notify(ep); TODO: allow to send data on CREQ RX */ return skb; }
static void uct_ud_ep_rx_creq(uct_ud_iface_t *iface, uct_ud_neth_t *neth) { uct_ud_ep_t *ep; uct_ud_ctl_hdr_t *ctl = (uct_ud_ctl_hdr_t *)(neth + 1); ucs_assert_always(ctl->type == UCT_UD_PACKET_CREQ); ep = uct_ud_iface_cep_lookup(iface, &ctl->conn_req.ib_addr, ctl->conn_req.conn_id); if (!ep) { ep = uct_ud_ep_create_passive(iface, ctl); ucs_assert_always(ep != NULL); ep->rx.ooo_pkts.head_sn = neth->psn; } else { if (ep->dest_ep_id == UCT_UD_EP_NULL_ID) { /* simultaniuos CREQ */ ep->dest_ep_id = ctl->conn_req.ib_addr.id; ep->rx.ooo_pkts.head_sn = neth->psn; ucs_debug("created ep=%p (iface=%p conn_id=%d ep_id=%d, dest_ep_id=%d rx_psn=%u)", ep, iface, ep->conn_id, ep->ep_id, ep->dest_ep_id, ep->rx.ooo_pkts.head_sn); } } ucs_assert_always(ctl->conn_req.conn_id == ep->conn_id); ucs_assert_always(ctl->conn_req.ib_addr.id == ep->dest_ep_id); /* creq must always have same psn */ ucs_assert_always(ep->rx.ooo_pkts.head_sn == neth->psn); /* scedule connection reply op */ UCT_UD_EP_HOOK_CALL_RX(ep, neth); uct_ud_iface_queue_pending(iface, ep, UCT_UD_EP_OP_CREP); }
static void uct_ud_ep_rx_ctl(uct_ud_iface_t *iface, uct_ud_ep_t *ep, uct_ud_ctl_hdr_t *ctl) { ucs_trace_func(""); ucs_assert_always(ctl->type == UCT_UD_PACKET_CREP); /* note that duplicate creps are discared earlier */ ucs_assert_always(ep->dest_ep_id == UCT_UD_EP_NULL_ID || ep->dest_ep_id == ctl->conn_rep.src_ep_id); ep->dest_ep_id = ctl->conn_rep.src_ep_id; /* TODO notify */ }
ucs_status_t uct_rc_verbs_ep_connect_to_ep(uct_ep_h tl_ep, const uct_device_addr_t *dev_addr, const uct_ep_addr_t *ep_addr) { uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); const uct_ib_address_t *ib_addr = (const uct_ib_address_t *)dev_addr; const uct_rc_ep_address_t *rc_addr = (const uct_rc_ep_address_t*)ep_addr; struct ibv_ah_attr ah_attr; ucs_status_t status; uint32_t qp_num; uct_ib_iface_fill_ah_attr(&iface->super.super, ib_addr, ep->super.path_bits, &ah_attr); #if HAVE_IBV_EX_HW_TM if(UCT_RC_VERBS_TM_ENABLED(iface)) { /* For HW TM we need 2 QPs, one of which will be used by the device for * RNDV offload (for issuing RDMA reads and sending RNDV ACK). No WQEs * should be posted to the send side of the QP which is owned by device. */ uct_rc_verbs_ep_tm_address_t *tm_addr = ucs_derived_of(rc_addr, uct_rc_verbs_ep_tm_address_t); ucs_assert_always(tm_addr->super.type == UCT_RC_EP_ADDR_TYPE_TM); status = uct_rc_iface_qp_connect(&iface->super, ep->tm_qp, uct_ib_unpack_uint24(rc_addr->qp_num), &ah_attr); if (status != UCS_OK) { return status; } /* Need to connect local ep QP to the one owned by device * (and bound to XRQ) on the peer. */ qp_num = uct_ib_unpack_uint24(tm_addr->tm_qp_num); } else #endif { ucs_assert_always(rc_addr->type == UCT_RC_EP_ADDR_TYPE_BASIC); qp_num = uct_ib_unpack_uint24(rc_addr->qp_num); } status = uct_rc_iface_qp_connect(&iface->super, ep->super.txqp.qp, qp_num, &ah_attr); if (status != UCS_OK) { return status; } ep->super.atomic_mr_offset = uct_ib_md_atomic_offset(rc_addr->atomic_mr_id); return UCS_OK; }
/* add new handler to the table */ static ucs_status_t ucs_async_handler_add(int min_id, int max_id, ucs_async_handler_t *handler) { int hash_extra_status; ucs_status_t status; khiter_t hash_it; int i, id; pthread_rwlock_wrlock(&ucs_async_global_context.handlers_lock); handler->id = -1; ucs_assert_always(handler->refcount == 1); /* * Search for an empty key in the range [min_id, max_id) * ucs_async_global_context.handler_id is used to generate "unique" keys. */ for (i = min_id; i < max_id; ++i) { id = min_id + (ucs_atomic_fadd32(&ucs_async_global_context.handler_id, 1) % (max_id - min_id)); hash_it = kh_put(ucs_async_handler, &ucs_async_global_context.handlers, id, &hash_extra_status); if (hash_extra_status == -1) { ucs_error("Failed to add async handler " UCS_ASYNC_HANDLER_FMT " to hash", UCS_ASYNC_HANDLER_ARG(handler)); status = UCS_ERR_NO_MEMORY; goto out_unlock; } else if (hash_extra_status != 0) { handler->id = id; ucs_assert(id != -1); break; } } if (handler->id == -1) { ucs_error("Cannot add async handler %s() - id range [%d..%d) is full", ucs_debug_get_symbol_name(handler->cb), min_id, max_id); status = UCS_ERR_ALREADY_EXISTS; goto out_unlock; } ucs_assert_always(!ucs_async_handler_kh_is_end(hash_it)); kh_value(&ucs_async_global_context.handlers, hash_it) = handler; ucs_debug("added async handler " UCS_ASYNC_HANDLER_FMT " to hash", UCS_ASYNC_HANDLER_ARG(handler)); status = UCS_OK; out_unlock: pthread_rwlock_unlock(&ucs_async_global_context.handlers_lock); return status; }
static void ucm_malloc_mmaped_ptr_add(void *ptr) { int hash_extra_status; khiter_t hash_it; ucs_spin_lock(&ucm_malloc_hook_state.lock); hash_it = kh_put(mmap_ptrs, &ucm_malloc_hook_state.ptrs, ptr, &hash_extra_status); ucs_assert_always(hash_extra_status >= 0); ucs_assert_always(hash_it != kh_end(&ucm_malloc_hook_state.ptrs)); ucs_spin_unlock(&ucm_malloc_hook_state.lock); }
ucs_status_t uct_ud_ep_connect_to_ep(uct_ud_ep_t *ep, const struct sockaddr *addr) { uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t); uct_ib_device_t *dev = uct_ib_iface_device(&iface->super); const uct_sockaddr_ib_t *ib_addr = (uct_sockaddr_ib_t *)addr; ucs_assert_always(ep->dest_ep_id == UCT_UD_EP_NULL_ID); ucs_trace_func(""); ep->dest_ep_id = ib_addr->id; ep->dest_qpn = ib_addr->qp_num; uct_ud_ep_reset(ep); ucs_debug("%s:%d slid=%d qpn=%d ep=%u connected to dlid=%d qpn=%d ep=%u", ibv_get_device_name(dev->ibv_context->device), iface->super.port_num, dev->port_attr[iface->super.port_num-dev->first_port].lid, iface->qp->qp_num, ep->ep_id, ib_addr->lid, ep->dest_qpn, ep->dest_ep_id); return UCS_OK; }
static size_t ucp_tag_rndv_pack_rkey(ucp_request_t *sreq, ucp_rndv_rts_hdr_t *rndv_rts_hdr) { ucp_ep_h ep = sreq->send.ep; size_t packed_rkey = 0; ucs_status_t status; ucs_assert(UCP_DT_IS_CONTIG(sreq->send.datatype)); /* Check if the sender needs to register the send buffer - * is its datatype contiguous and does the receive side need it */ if ((ucp_ep_is_rndv_lane_present(ep)) && (ucp_ep_rndv_md_flags(ep) & UCT_MD_FLAG_NEED_RKEY)) { status = ucp_request_send_buffer_reg(sreq, ucp_ep_get_rndv_get_lane(ep)); ucs_assert_always(status == UCS_OK); /* if the send buffer was registered, send the rkey */ UCS_PROFILE_CALL(uct_md_mkey_pack, ucp_ep_md(ep, ucp_ep_get_rndv_get_lane(ep)), sreq->send.state.dt.contig.memh, rndv_rts_hdr + 1); rndv_rts_hdr->flags |= UCP_RNDV_RTS_FLAG_PACKED_RKEY; packed_rkey = ucp_ep_md_attr(ep, ucp_ep_get_rndv_get_lane(ep))->rkey_packed_size; } return packed_rkey; }
void uct_dc_ep_release(uct_dc_ep_t *ep) { ucs_assert_always(ep->state == UCT_DC_EP_INVALID); ucs_debug("release dc_ep %p", ep); ucs_list_del(&ep->list); ucs_free(ep); }
static int ucp_is_resource_in_device_list(uct_tl_resource_desc_t *resource, const str_names_array_t *devices, uint64_t *masks, int index) { int device_enabled, config_idx; if (devices[index].count == 0) { return 0; } if (!strcmp(devices[index].names[0], "all")) { /* if the user's list is 'all', use all the available resources */ device_enabled = 1; masks[index] = -1; /* using all available devices. can satisfy 'all' */ } else { /* go over the device list from the user and check (against the available resources) * which can be satisfied */ device_enabled = 0; ucs_assert_always(devices[index].count <= 64); /* Using uint64_t bitmap */ config_idx = ucp_str_array_search((const char**)devices[index].names, devices[index].count, resource->dev_name); if (config_idx >= 0) { device_enabled = 1; masks[index] |= UCS_BIT(config_idx); } } return device_enabled; }
static int ucp_address_iter_next(void **iter, struct sockaddr **iface_addr_p, char *tl_name, ucp_rsc_index_t *pd_index) { char *ptr = *iter; uint8_t iface_addr_len; uint8_t tl_name_len; iface_addr_len = *(uint8_t*)(ptr++); if (iface_addr_len == 0) { return 0; } *iface_addr_p = (struct sockaddr*)ptr; ptr += iface_addr_len; tl_name_len = *(uint8_t*)(ptr++); ucs_assert_always(tl_name_len < UCT_TL_NAME_MAX); memcpy(tl_name, ptr, tl_name_len); tl_name[tl_name_len] = '\0'; ptr += tl_name_len; *pd_index = *((ucp_rsc_index_t*)ptr++); *iter = ptr; return 1; }
static UCS_CLASS_CLEANUP_FUNC(uct_dc_ep_t) { uct_dc_iface_t *iface = ucs_derived_of(self->super.super.iface, uct_dc_iface_t); uct_dc_ep_pending_purge(&self->super.super, NULL, NULL); ucs_arbiter_group_cleanup(&self->arb_group); uct_rc_fc_cleanup(&self->fc); ucs_assert_always(self->state != UCT_DC_EP_INVALID); if (self->dci == UCT_DC_EP_NO_DCI) { return; } /* TODO: this is good for dcs policy only. * Need to change if eps share dci */ ucs_assertv_always(uct_dc_iface_dci_has_outstanding(iface, self->dci), "iface (%p) ep (%p) dci leak detected: dci=%d", iface, self, self->dci); /* we can handle it but well behaving app should not do this */ ucs_warn("ep (%p) is destroyed with %d outstanding ops", self, (int16_t)iface->super.config.tx_qp_len - uct_rc_txqp_available(&iface->tx.dcis[self->dci].txqp)); uct_rc_txqp_purge_outstanding(&iface->tx.dcis[self->dci].txqp, UCS_ERR_CANCELED, 1); iface->tx.dcis[self->dci].ep = NULL; }
void uct_dc_mlx5_ep_release(uct_dc_mlx5_ep_t *ep) { ucs_assert_always(!(ep->flags & UCT_DC_MLX5_EP_FLAG_VALID)); ucs_debug("release dc_mlx5_ep %p", ep); ucs_list_del(&ep->list); ucs_free(ep); }
static ucs_status_t recieve_datagram(uct_ugni_udt_iface_t *iface, uint64_t id, uct_ugni_udt_ep_t **ep_out) { uint32_t rem_addr, rem_id; gni_post_state_t post_state; gni_return_t ugni_rc; uct_ugni_udt_ep_t *ep; gni_ep_handle_t gni_ep; uct_ugni_udt_desc_t *desc; uct_ugni_udt_header_t *header; ucs_trace_func("iface=%p, id=%lx", iface, id); if (UCT_UGNI_UDT_ANY == id) { ep = NULL; gni_ep = iface->ep_any; desc = iface->desc_any; } else { ep = ucs_derived_of(uct_ugni_iface_lookup_ep(&iface->super, id), uct_ugni_udt_ep_t); gni_ep = ep->super.ep; desc = ep->posted_desc; } *ep_out = ep; uct_ugni_device_lock(&iface->super.cdm); ugni_rc = GNI_EpPostDataWaitById(gni_ep, id, -1, &post_state, &rem_addr, &rem_id); uct_ugni_device_unlock(&iface->super.cdm); if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { ucs_error("GNI_EpPostDataWaitById, id=%lu Error status: %s %d", id, gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_IO_ERROR; } if (GNI_POST_TERMINATED == post_state) { return UCS_ERR_CANCELED; } if (GNI_POST_COMPLETED != post_state) { ucs_error("GNI_EpPostDataWaitById gave unexpected response: %u", post_state); return UCS_ERR_IO_ERROR; } if (UCT_UGNI_UDT_ANY != id) { --iface->super.outstanding; } header = uct_ugni_udt_get_rheader(desc, iface); ucs_trace("Got datagram id: %lu type: %i len: %i am_id: %i", id, header->type, header->length, header->am_id); if (UCT_UGNI_UDT_PAYLOAD != header->type) { /* ack message, no data */ ucs_assert_always(NULL != ep); ucs_mpool_put(ep->posted_desc); uct_ugni_check_flush(ep->desc_flush_group); ep->posted_desc = NULL; return UCS_OK; } return UCS_INPROGRESS; }
static ucs_status_t uct_knem_mem_dereg(uct_md_h md, uct_mem_h memh) { int rc; uct_knem_key_t *key = (uct_knem_key_t *)memh; uct_knem_md_t *knem_md = (uct_knem_md_t *)md; int knem_fd = knem_md->knem_fd; ucs_assert_always(knem_fd > -1); ucs_assert_always(key->cookie != 0); ucs_assert_always(key->address != 0); rc = ioctl(knem_fd, KNEM_CMD_DESTROY_REGION, &key->cookie); if (rc < 0) { ucs_error("KNEM destroy region failed, err = %m"); } ucs_free(key); return UCS_OK; }
static uct_ud_ep_t *uct_ud_ep_create_passive(uct_ud_iface_t *iface, uct_ud_ctl_hdr_t *ctl) { uct_ud_ep_t *ep; ucs_status_t status; uct_ep_t *ep_h; uct_iface_t *iface_h = &iface->super.super.super; /* create new endpoint */ status = iface_h->ops.ep_create(iface_h, &ep_h); ucs_assert_always(status == UCS_OK); ep = ucs_derived_of(ep_h, uct_ud_ep_t); status = iface_h->ops.ep_connect_to_ep(ep_h, (struct sockaddr *)&ctl->conn_req.ib_addr); ucs_assert_always(status == UCS_OK); status = uct_ud_iface_cep_insert(iface, &ctl->conn_req.ib_addr, ep, ctl->conn_req.conn_id); ucs_assert_always(status == UCS_OK); return ep; }
/* * If a certain system constant (name) is undefined on the underlying system the * sysconf routine returns -1. ucs_sysconf return the negative value * a user and the user is responsible to define default value or abort. * * If an error occurs sysconf modified errno and ucs_sysconf aborts. * * Otherwise, a non-negative values is returned. */ static long ucs_sysconf(int name) { long rc; errno = 0; rc = sysconf(name); ucs_assert_always(errno == 0); return rc; }
static void uct_ugni_udt_iface_release_desc(uct_recv_desc_t *self, void *desc) { uct_ugni_udt_desc_t *ugni_desc; uct_ugni_udt_iface_t *iface = ucs_container_of(self, uct_ugni_udt_iface_t, release_desc); ugni_desc = (uct_ugni_udt_desc_t *)((uct_recv_desc_t *)desc - 1); ucs_assert_always(NULL != ugni_desc); uct_ugni_udt_reset_desc(ugni_desc, iface); ucs_mpool_put(ugni_desc); }
static ucs_status_t uct_knem_mem_reg(uct_md_h md, void *address, size_t length, uct_mem_h *memh_p) { int rc; struct knem_cmd_create_region create; struct knem_cmd_param_iovec knem_iov[1]; uct_knem_md_t *knem_md = (uct_knem_md_t *)md; int knem_fd = knem_md->knem_fd; uct_knem_key_t *key; ucs_assert_always(knem_fd > -1); key = ucs_malloc(sizeof(uct_knem_key_t), "uct_knem_key_t"); if (NULL == key) { ucs_error("Failed to allocate memory for uct_knem_key_t"); return UCS_ERR_NO_MEMORY; } knem_iov[0].base = (uintptr_t) address; knem_iov[0].len = length; memset(&create, 0, sizeof(struct knem_cmd_create_region)); create.iovec_array = (uintptr_t) &knem_iov[0]; create.iovec_nr = 1; create.flags = 0; create.protection = PROT_READ | PROT_WRITE; rc = ioctl(knem_fd, KNEM_CMD_CREATE_REGION, &create); if (rc < 0) { ucs_error("KNEM create region failed: %m"); ucs_free(key); return UCS_ERR_IO_ERROR; } ucs_assert_always(create.cookie != 0); key->cookie = create.cookie; key->address = (uintptr_t)address; *memh_p = key; return UCS_OK; }
uct_ud_send_skb_t *uct_ud_ep_prepare_creq(uct_ud_ep_t *ep) { uct_ud_send_skb_t *skb; uct_ud_neth_t *neth; uct_ud_ctl_hdr_t *creq; uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t); uct_sockaddr_ib_t iface_addr; ucs_status_t status; ucs_assert_always(ep->dest_ep_id == UCT_UD_EP_NULL_ID); ucs_assert_always(ep->ep_id != UCT_UD_EP_NULL_ID); memset(&iface_addr, 0, sizeof(iface_addr)); /* make coverity happy */ status = uct_ud_iface_get_address(&iface->super.super.super, (struct sockaddr *)&iface_addr); if (status != UCS_OK) { return NULL; } skb = uct_ud_iface_get_tx_skb(iface, ep); if (!skb) { return NULL; } neth = skb->neth; uct_ud_neth_init_data(ep, neth); neth->packet_type = UCT_UD_EP_NULL_ID; neth->packet_type |= UCT_UD_PACKET_FLAG_CTL; creq = (uct_ud_ctl_hdr_t *)(neth + 1); creq->type = UCT_UD_PACKET_CREQ; creq->conn_req.ib_addr.qp_num = iface_addr.qp_num; creq->conn_req.ib_addr.lid = iface_addr.lid; creq->conn_req.ib_addr.id = ep->ep_id; creq->conn_req.conn_id = ep->conn_id; skb->len = sizeof(*neth) + sizeof(*creq); return skb; }
static int ucp_is_resource_enabled(uct_tl_resource_desc_t *resource, const ucp_config_t *config, uint64_t *devices_mask_p) { int device_enabled, tl_enabled; unsigned config_idx; ucs_assert(config->devices.count > 0); if (!strcmp(config->devices.names[0], "all")) { /* if the user's list is 'all', use all the available resources */ device_enabled = 1; *devices_mask_p = 1; } else { /* go over the device list from the user and check (against the available resources) * which can be satisfied */ device_enabled = 0; *devices_mask_p = 0; ucs_assert_always(config->devices.count <= 64); /* Using uint64_t bitmap */ for (config_idx = 0; config_idx < config->devices.count; ++config_idx) { if (!strcmp(config->devices.names[config_idx], resource->dev_name)) { device_enabled = 1; *devices_mask_p |= UCS_MASK(config_idx); } } } /* Disable the posix mmap and xpmem 'devices'. ONLY for now - use sysv for mm . * This will be removed after multi-rail is supported */ if (!strcmp(resource->dev_name,"posix") || !strcmp(resource->dev_name, "xpmem")) { device_enabled = 0; } ucs_assert(config->tls.count > 0); if (!strcmp(config->tls.names[0], "all")) { /* if the user's list is 'all', use all the available tls */ tl_enabled = 1; } else { /* go over the tls list from the user and compare it against the available resources */ tl_enabled = 0; for (config_idx = 0; config_idx < config->tls.count; ++config_idx) { if (!strcmp(config->tls.names[config_idx], resource->tl_name)) { tl_enabled = 1; break; } } } ucs_trace(UCT_TL_RESOURCE_DESC_FMT " is %sabled", UCT_TL_RESOURCE_DESC_ARG(resource), (device_enabled && tl_enabled) ? "en" : "dis"); return device_enabled && tl_enabled; }
/** * dispatch requests waiting for dci allocation */ ucs_arbiter_cb_result_t uct_dc_iface_dci_do_pending_wait(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, void *arg) { uct_dc_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_dc_ep_t, arb_group); uct_dc_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_dc_iface_t); /** * stop if dci can not be allocated * else move group to the dci arbiter */ ucs_assert_always(ep->dci == UCT_DC_EP_NO_DCI); if (!uct_dc_iface_dci_can_alloc(iface)) { return UCS_ARBITER_CB_RESULT_STOP; } uct_dc_iface_dci_alloc(iface, ep); ucs_assert_always(ep->dci != UCT_DC_EP_NO_DCI); uct_dc_iface_dci_sched_tx(iface, ep); return UCS_ARBITER_CB_RESULT_DESCHED_GROUP; }
ucs_status_t ucp_rkey_pack(ucp_context_h context, ucp_mem_h memh, void **rkey_buffer_p, size_t *size_p) { unsigned pd_index, uct_memh_index; void *rkey_buffer, *p; size_t size, pd_size; ucs_trace("packing rkeys for buffer %p memh %p pd_map 0x%"PRIx64, memh->address, memh, memh->pd_map); size = sizeof(uint64_t); for (pd_index = 0; pd_index < context->num_pds; ++pd_index) { size += sizeof(uint8_t); pd_size = context->pd_attrs[pd_index].rkey_packed_size; ucs_assert_always(pd_size < UINT8_MAX); size += pd_size; } rkey_buffer = ucs_malloc(size, "ucp_rkey_buffer"); if (rkey_buffer == NULL) { return UCS_ERR_NO_MEMORY; } p = rkey_buffer; /* Write the PD map */ *(uint64_t*)p = memh->pd_map; p += sizeof(uint64_t); /* Write both size and rkey_buffer for each UCT rkey */ uct_memh_index = 0; for (pd_index = 0; pd_index < context->num_pds; ++pd_index) { if (!(memh->pd_map & UCS_BIT(pd_index))) { continue; } pd_size = context->pd_attrs[pd_index].rkey_packed_size; *((uint8_t*)p++) = pd_size; uct_pd_mkey_pack(context->pds[pd_index], memh->uct[uct_memh_index], p); ++uct_memh_index; p += pd_size; } *rkey_buffer_p = rkey_buffer; *size_p = size; return UCS_OK; }
ucs_status_t uct_ib_mlx5_get_cq(struct ibv_cq *cq, uct_ib_mlx5_cq_t *mlx5_cq) { unsigned cqe_size; #if HAVE_DECL_IBV_MLX5_EXP_GET_CQ_INFO struct ibv_mlx5_cq_info ibv_cq_info; int ret; ret = ibv_mlx5_exp_get_cq_info(cq, &ibv_cq_info); if (ret != 0) { return UCS_ERR_NO_DEVICE; } mlx5_cq->cq_buf = ibv_cq_info.buf; mlx5_cq->cq_ci = 0; mlx5_cq->cq_length = ibv_cq_info.cqe_cnt; cqe_size = ibv_cq_info.cqe_size; #else struct mlx5_cq *mcq = ucs_container_of(cq, struct mlx5_cq, ibv_cq); int ret; if (mcq->cons_index != 0) { ucs_error("CQ consumer index is not 0 (%d)", mcq->cons_index); return UCS_ERR_NO_DEVICE; } mlx5_cq->cq_buf = mcq->active_buf->buf; mlx5_cq->cq_ci = 0; mlx5_cq->cq_length = mcq->ibv_cq.cqe + 1; cqe_size = mcq->cqe_sz; #endif /* Move buffer forward for 128b CQE, so we would get pointer to the 2nd * 64b when polling. */ mlx5_cq->cq_buf += cqe_size - sizeof(struct mlx5_cqe64); ret = ibv_exp_cq_ignore_overrun(cq); if (ret != 0) { ucs_error("Failed to modify send CQ to ignore overrun: %s", strerror(ret)); return UCS_ERR_UNSUPPORTED; } mlx5_cq->cqe_size_log = ucs_ilog2(cqe_size); ucs_assert_always((1<<mlx5_cq->cqe_size_log) == cqe_size); return UCS_OK; }
ucs_status_t uct_ud_verbs_ep_create_connected(uct_iface_h iface_h, const struct sockaddr *addr, uct_ep_h *new_ep_p) { uct_ud_verbs_iface_t *iface = ucs_derived_of(iface_h, uct_ud_verbs_iface_t); uct_ud_verbs_ep_t *ep; uct_ud_ep_t *new_ud_ep; const uct_sockaddr_ib_t *if_addr = (const uct_sockaddr_ib_t *)addr; uct_ud_send_skb_t *skb; struct ibv_ah *ah; ucs_status_t status; uct_ud_enter(&iface->super); status = uct_ud_ep_create_connected_common(&iface->super, if_addr, &new_ud_ep, &skb); if (status != UCS_OK) { return status; } ep = ucs_derived_of(new_ud_ep, uct_ud_verbs_ep_t); *new_ep_p = &ep->super.super.super; if (skb == NULL) { uct_ud_leave(&iface->super); return UCS_OK; } ucs_assert_always(ep->ah == NULL); ah = uct_ib_create_ah(&iface->super.super, if_addr->lid); if (ah == NULL) { ucs_error("failed to create address handle: %m"); status = UCS_ERR_INVALID_ADDR; goto err; } ep->ah = ah; ucs_trace_data("TX: CREQ (qp=%x lid=%d)", if_addr->qp_num, if_addr->lid); uct_ud_verbs_ep_tx_skb(iface, ep, skb, IBV_SEND_INLINE); uct_ud_iface_complete_tx_skb(&iface->super, &ep->super, skb); uct_ud_leave(&iface->super); return UCS_OK; err: uct_ud_ep_destroy_connected(&ep->super, if_addr); uct_ud_leave(&iface->super); *new_ep_p = NULL; return status; }
static void ucp_ep_remote_connected(ucp_ep_h ep) { ucp_worker_h worker = ep->worker; uct_iface_attr_t *iface_attr = &worker->iface_attrs[ep->uct.rsc_index]; ucs_debug("connected 0x%"PRIx64"->0x%"PRIx64, worker->uuid, ep->dest_uuid); ep->config.max_short_tag = iface_attr->cap.am.max_short - sizeof(uint64_t); ep->config.max_short_put = iface_attr->cap.put.max_short; ep->config.max_bcopy_put = iface_attr->cap.put.max_bcopy; ep->config.max_bcopy_get = iface_attr->cap.get.max_bcopy; /* Synchronize with other threads */ ucs_memory_cpu_store_fence(); ucs_assert_always(ep->state & UCP_EP_STATE_LOCAL_CONNECTED); ep->state |= UCP_EP_STATE_REMOTE_CONNECTED; }
ucs_status_t uct_ud_mlx5_iface_get_av(uct_ib_iface_t *iface, uct_ud_mlx5_iface_common_t *ud_common_iface, const uct_ib_address_t *ib_addr, uint8_t path_bits, uct_ib_mlx5_base_av_t *base_av, struct mlx5_grh_av *grh_av, int *is_global) { ucs_status_t status; struct ibv_ah *ah; struct mlx5_wqe_av mlx5_av; status = uct_ib_iface_create_ah(iface, ib_addr, path_bits, &ah, is_global); if (status != UCS_OK) { return UCS_ERR_INVALID_ADDR; } uct_ib_mlx5_get_av(ah, &mlx5_av); ibv_destroy_ah(ah); base_av->stat_rate_sl = mlx5_av_base(&mlx5_av)->stat_rate_sl; base_av->fl_mlid = mlx5_av_base(&mlx5_av)->fl_mlid; base_av->rlid = mlx5_av_base(&mlx5_av)->rlid; /* copy MLX5_EXTENDED_UD_AV from the driver, if the flag is not present then * the device supports compact address vector. */ if (ud_common_iface->config.compact_av) { base_av->dqp_dct = mlx5_av_base(&mlx5_av)->dqp_dct & UCT_IB_MLX5_EXTENDED_UD_AV; } else { base_av->dqp_dct = UCT_IB_MLX5_EXTENDED_UD_AV; } ucs_assertv_always((UCT_IB_MLX5_AV_FULL_SIZE > UCT_IB_MLX5_AV_BASE_SIZE) || (base_av->dqp_dct & UCT_IB_MLX5_EXTENDED_UD_AV), "compact address vector not supported, and EXTENDED_AV flag is missing"); if (*is_global) { ucs_assert_always(grh_av != NULL); memcpy(grh_av, mlx5_av_grh(&mlx5_av), sizeof(*grh_av)); } return UCS_OK; }
/* incremented reference count and return the handler */ static ucs_async_handler_t *ucs_async_handler_get(int id) { ucs_async_handler_t *handler; khiter_t hash_it; pthread_rwlock_rdlock(&ucs_async_global_context.handlers_lock); hash_it = ucs_async_handler_kh_get(id); if (ucs_async_handler_kh_is_end(hash_it)) { handler = NULL; goto out_unlock; } handler = kh_value(&ucs_async_global_context.handlers, hash_it); ucs_assert_always(handler->id == id); ucs_async_handler_hold(handler); out_unlock: pthread_rwlock_unlock(&ucs_async_global_context.handlers_lock); return handler; }
/* remove from hash and return the handler */ static ucs_async_handler_t *ucs_async_handler_extract(int id) { ucs_async_handler_t *handler; khiter_t hash_it; pthread_rwlock_wrlock(&ucs_async_global_context.handlers_lock); hash_it = ucs_async_handler_kh_get(id); if (ucs_async_handler_kh_is_end(hash_it)) { ucs_debug("async handler [id=%d] not found in hash table", id); handler = NULL; } else { handler = kh_value(&ucs_async_global_context.handlers, hash_it); ucs_assert_always(handler->id == id); kh_del(ucs_async_handler, &ucs_async_global_context.handlers, hash_it); ucs_debug("removed async handler " UCS_ASYNC_HANDLER_FMT " from hash", UCS_ASYNC_HANDLER_ARG(handler)); } pthread_rwlock_unlock(&ucs_async_global_context.handlers_lock); return handler; }