ucs_arbiter_cb_result_t uct_ugni_ep_process_pending(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, void *arg){ uct_ugni_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_ugni_ep_t, arb_group); uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); ucs_status_t rc; ep->arb_sched = 1; ucs_trace_data("progressing pending request %p", req); rc = req->func(req); ep->arb_sched = 0; ucs_trace_data("status returned from progress pending: %s", ucs_status_string(rc)); if (UCS_OK == rc) { /* sent successfully. remove from the arbiter */ return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; } else if (UCS_INPROGRESS == rc) { return UCS_ARBITER_CB_RESULT_NEXT_GROUP; } else { /* couldn't send. keep this request in the arbiter until the next time * this function is called */ return UCS_ARBITER_CB_RESULT_RESCHED_GROUP; } }
void uct_ugni_proccess_datagram_pipe(int event_id, void *arg) { uct_ugni_udt_iface_t *iface = (uct_ugni_udt_iface_t *)arg; uct_ugni_udt_ep_t *ep; uct_ugni_udt_desc_t *datagram; ucs_status_t status; void *user_desc; gni_return_t ugni_rc; uint64_t id; ucs_trace_func(""); uct_ugni_device_lock(&iface->super.cdm); ugni_rc = GNI_PostDataProbeById(uct_ugni_udt_iface_nic_handle(iface), &id); uct_ugni_device_unlock(&iface->super.cdm); while (GNI_RC_SUCCESS == ugni_rc) { status = recieve_datagram(iface, id, &ep); if (UCS_INPROGRESS == status) { if (ep != NULL){ ucs_trace_data("Processing reply"); datagram = ep->posted_desc; status = processs_datagram(iface, datagram); if (UCS_OK != status) { user_desc = uct_ugni_udt_get_user_desc(datagram, iface); uct_recv_desc(user_desc) = &iface->release_desc; } else { ucs_mpool_put(datagram); } ep->posted_desc = NULL; uct_ugni_check_flush(ep->desc_flush_group); } else { ucs_trace_data("Processing wildcard"); datagram = iface->desc_any; status = processs_datagram(iface, datagram); if (UCS_OK != status) { UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, iface->desc_any, iface->desc_any=NULL); user_desc = uct_ugni_udt_get_user_desc(datagram, iface); uct_recv_desc(user_desc) = &iface->release_desc; } status = uct_ugni_udt_ep_any_post(iface); if (UCS_OK != status) { /* We can't continue if we can't post the first receive */ ucs_error("Failed to post wildcard request"); return; } } } uct_ugni_device_lock(&iface->super.cdm); ugni_rc = GNI_PostDataProbeById(uct_ugni_udt_iface_nic_handle(iface), &id); uct_ugni_device_unlock(&iface->super.cdm); } ucs_async_pipe_drain(&iface->event_pipe); pthread_mutex_lock(&iface->device_lock); iface->events_ready = 0; pthread_mutex_unlock(&iface->device_lock); ucs_trace("Signaling device thread to resume monitoring"); pthread_cond_signal(&iface->device_condition); }
static void ucp_wireup_log(ucp_worker_h worker, uint8_t am_id, ucp_wireup_msg_t *msg, int is_send) { ucp_context_h context = worker->context; const char *msg_type; switch (am_id) { case UCP_AM_ID_CONN_REQ: msg_type = "CONN_REQ"; break; case UCP_AM_ID_CONN_REP: msg_type = "CONN_REP"; break; case UCP_AM_ID_CONN_ACK: msg_type = "CONN_ACK"; break; default: return; } if (is_send) { ucs_trace_data("TX: %s [uuid 0x%"PRIx64" from "UCT_TL_RESOURCE_DESC_FMT" pd %s to %d af %d]", msg_type, msg->src_uuid, UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[msg->src_rsc_index].tl_rsc), context->pd_rscs[msg->src_pd_index].pd_name, msg->dst_rsc_index, ((struct sockaddr*)(msg + 1))->sa_family); } else { ucs_trace_data("RX: %s [uuid 0x%"PRIx64" from %d pd %d to "UCT_TL_RESOURCE_DESC_FMT" af %d]", msg_type, msg->src_uuid, msg->src_rsc_index, msg->src_pd_index, UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[msg->dst_rsc_index].tl_rsc), ((struct sockaddr*)(msg + 1))->sa_family); } }
static inline ucs_status_t uct_ud_verbs_iface_poll_rx(uct_ud_verbs_iface_t *iface) { uct_ib_iface_recv_desc_t *desc; struct ibv_wc wc[UCT_IB_MAX_WC]; int i, ret; char *packet; ret = ibv_poll_cq(iface->super.super.recv_cq, UCT_IB_MAX_WC, wc); if (ret == 0) { return UCS_ERR_NO_PROGRESS; } if (ucs_unlikely(ret < 0)) { ucs_fatal("Failed to poll receive CQ"); } for (i = 0; i < ret; ++i) { if (ucs_unlikely(wc[i].status != IBV_WC_SUCCESS)) { ucs_fatal("Receive completion with error: %s", ibv_wc_status_str(wc[i].status)); } desc = (void*)wc[i].wr_id; ucs_trace_data("pkt rcvd: buf=%p len=%d", desc, wc[i].byte_len); packet = uct_ib_iface_recv_desc_hdr(&iface->super.super, desc); VALGRIND_MAKE_MEM_DEFINED(packet, wc[i].byte_len); uct_ud_ep_process_rx(&iface->super, (uct_ud_neth_t *)(packet + UCT_IB_GRH_LEN), wc[i].byte_len - UCT_IB_GRH_LEN, (uct_ud_recv_skb_t *)desc); } iface->super.rx.available += ret; uct_ud_verbs_iface_post_recv(iface); return UCS_OK; }
static ucs_status_t uct_ud_verbs_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, const void *buffer, unsigned length) { uct_ud_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_verbs_ep_t); uct_ud_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ud_verbs_iface_t); uct_ud_send_skb_t *skb; uct_ud_am_short_hdr_t *am_hdr; ucs_status_t status; status = uct_ud_verbs_am_common(iface, ep, id, &skb); if (status != UCS_OK) { return status; } UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + sizeof(hdr) + length, iface->super.config.max_inline, "am_short"); am_hdr = (uct_ud_am_short_hdr_t *)(skb->neth+1); am_hdr->hdr = hdr; iface->tx.sge[0].length = sizeof(uct_ud_neth_t) + sizeof(*am_hdr); uct_ud_verbs_iface_tx_inl(iface, ep, buffer, length); ucs_trace_data("TX: AM [%d] buf=%p len=%u", id, buffer, length); skb->len = iface->tx.sge[0].length; uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb, am_hdr+1, buffer, length); return UCS_OK; }
ucs_status_t uct_ugni_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_rdma_fetch_desc_t *fma; UCT_SKIP_ZERO_LENGTH(length); UCT_CHECK_LENGTH(ucs_align_up_pow2(length, UGNI_GET_ALIGN), 0, iface->config.fma_seg_size, "get_bcopy"); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get_buffer, fma, return UCS_ERR_NO_RESOURCE); uct_ugni_format_get_fma(fma, GNI_POST_FMA_GET, remote_addr, rkey, length, ep, comp, uct_ugni_unalign_fma_get_cb, unpack_cb, arg); ucs_trace_data("Posting GET BCOPY, GNI_PostFma of size %"PRIx64" (%lu) from %p to " "%p, with [%"PRIx64" %"PRIx64"]", fma->super.desc.length, length, (void *)fma->super.desc.local_addr, (void *)fma->super.desc.remote_addr, fma->super.desc.remote_mem_hndl.qword1, fma->super.desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, BCOPY, length); return uct_ugni_post_fma(iface, ep, &fma->super, UCS_INPROGRESS); }
ssize_t uct_ugni_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, void *arg, uint64_t remote_addr, uct_rkey_t rkey) { /* Since custom pack function is used * we have to allocate separate memory to pack * the info and pass it to FMA * something like: * pack_cb(desc + 1, arg, length); */ uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_base_desc_t *fma; size_t length; UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_buffer, fma, return UCS_ERR_NO_RESOURCE); length = pack_cb(fma + 1, arg); UCT_SKIP_ZERO_LENGTH(length, fma); UCT_CHECK_LENGTH(length, 0, iface->config.fma_seg_size, "put_bcopy"); uct_ugni_format_fma(fma, GNI_POST_FMA_PUT, fma + 1, remote_addr, rkey, length, ep, NULL, NULL); ucs_trace_data("Posting PUT BCOPY, GNI_PostFma of size %"PRIx64" from %p to " "%p, with [%"PRIx64" %"PRIx64"]", fma->desc.length, (void *)fma->desc.local_addr, (void *)fma->desc.remote_addr, fma->desc.remote_mem_hndl.qword1, fma->desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, BCOPY, length); return uct_ugni_post_fma(iface, ep, fma, length); }
static ucs_status_t uct_ud_verbs_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg, size_t length) { uct_ud_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_verbs_ep_t); uct_ud_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ud_verbs_iface_t); uct_ud_send_skb_t *skb; char *data; ucs_status_t status; UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + length, 4096 /* TODO */, "am_bcopy"); status = uct_ud_verbs_am_common(iface, ep, id, &skb); if (status != UCS_OK) { return status; } data = (char *)(skb->neth+1); pack_cb(data, arg, length); iface->tx.sge[0].lkey = skb->lkey; skb->len = iface->tx.sge[0].length = sizeof(uct_ud_neth_t) + length; uct_ud_verbs_iface_tx_data(iface, ep); ucs_trace_data("TX(iface=%p): AM_BCOPY [%d] skb=%p buf=%p len=%u", iface, id, skb, arg, (int)length); uct_ud_iface_complete_tx_skb(&iface->super, &ep->super, skb); return UCS_OK; }
ucs_status_t uct_ugni_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_base_desc_t *rdma; UCT_CHECK_PARAM_IOV(iov, iovcnt, buffer, length, memh); UCT_SKIP_ZERO_LENGTH(length); UCT_CHECK_LENGTH(length, 0, iface->config.rdma_max_size, "put_zcopy"); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, rdma, return UCS_ERR_NO_RESOURCE); /* Setup Callback */ uct_ugni_format_rdma(rdma, GNI_POST_RDMA_PUT, buffer, remote_addr, memh, rkey, length, ep, iface->super.local_cq, comp); ucs_trace_data("Posting PUT ZCOPY, GNI_PostRdma of size %"PRIx64" from %p to %p, with [%"PRIx64" %"PRIx64"]", rdma->desc.length, (void *)rdma->desc.local_addr, (void *)rdma->desc.remote_addr, rdma->desc.remote_mem_hndl.qword1, rdma->desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, ZCOPY, length); return uct_ugni_post_rdma(iface, ep, rdma); }
static ssize_t uct_ud_verbs_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg) { uct_ud_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_verbs_ep_t); uct_ud_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ud_verbs_iface_t); uct_ud_send_skb_t *skb; ucs_status_t status; size_t length; uct_ud_enter(&iface->super); uct_ud_iface_progress_pending_tx(&iface->super); status = uct_ud_am_common(&iface->super, &ep->super, id, &skb); if (status != UCS_OK) { uct_ud_leave(&iface->super); return status; } length = uct_ud_skb_bcopy(skb, pack_cb, arg); uct_ud_verbs_ep_tx_skb(iface, ep, skb, 0); ucs_trace_data("TX(iface=%p): AM_BCOPY [%d] skb=%p buf=%p len=%u", iface, id, skb, arg, skb->len); uct_ud_iface_complete_tx_skb(&iface->super, &ep->super, skb); uct_ud_leave(&iface->super); return length; }
static unsigned uct_tcp_ep_send(uct_tcp_ep_t *ep) { uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_tcp_iface_t); size_t send_length; ucs_status_t status; send_length = ep->length - ep->offset; ucs_assert(send_length > 0); status = uct_tcp_send(ep->fd, ep->buf + ep->offset, &send_length); if (status < 0) { return 0; } ucs_trace_data("tcp_ep %p: sent %zu bytes", ep, send_length); iface->outstanding -= send_length; ep->offset += send_length; if (ep->offset == ep->length) { ep->offset = 0; ep->length = 0; } return send_length > 0; }
ssize_t uct_ugni_smsg_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg) { uct_ugni_smsg_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_smsg_iface_t); uct_ugni_smsg_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_smsg_ep_t); ssize_t packed; uct_ugni_smsg_desc_t *desc; ucs_status_t rc; void *smsg_data; uct_ugni_smsg_header_t *smsg_header; UCT_CHECK_AM_ID(id); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, desc, return UCS_ERR_NO_RESOURCE); ucs_trace_data("AM_BCOPY [%p] am_id: %d buf=%p", iface, id, arg ); smsg_header = (uct_ugni_smsg_header_t *)(desc+1); smsg_data = (void*)(smsg_header+1); packed = pack_cb(smsg_data, arg); smsg_header->length = packed; uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_SEND, id, smsg_data, packed, "TX: AM_BCOPY"); rc = uct_ugni_smsg_ep_am_common_send(ep, iface, id, sizeof(uct_ugni_smsg_header_t), smsg_header, packed, smsg_data, desc); return (UCS_OK == rc) ? packed : rc; }
ucs_status_t uct_ugni_smsg_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t header, const void *payload, unsigned length) { uct_ugni_smsg_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_smsg_iface_t); uct_ugni_smsg_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_smsg_ep_t); uct_ugni_smsg_header_t *smsg_header; uint64_t *header_data; uct_ugni_smsg_desc_t *desc; UCT_CHECK_AM_ID(id); UCT_CHECK_LENGTH(length, iface->config.smsg_seg_size - (sizeof(smsg_header) + sizeof(header)), "am_short"); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, desc, return UCS_ERR_NO_RESOURCE); ucs_trace_data("AM_SHORT [%p] am_id: %d buf=%p length=%u", iface, id, payload, length); smsg_header = (uct_ugni_smsg_header_t *)(desc+1); smsg_header->length = length + sizeof(header); header_data = (uint64_t*)(smsg_header+1); *header_data = header; memcpy((void*)(header_data+1), payload, length); uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_SEND, id, header_data, length, "TX: AM_SHORT"); return uct_ugni_smsg_ep_am_common_send(ep, iface, id, sizeof(uct_ugni_smsg_header_t), smsg_header, smsg_header->length, (void*)header_data, desc); }
ucs_status_t uct_rc_mlx5_ep_flush(uct_ep_h tl_ep) { uct_rc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_mlx5_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); ucs_status_t status; uint16_t exp_max_pi; /* * If we got completion for the last posted WQE, max_pi would be advanced * to the value calculated from prev_sw_pi - which is the index where the last * posted WQE started. See also uct_rc_mlx5_iface_poll_tx(). */ exp_max_pi = uct_rc_mlx5_calc_max_pi(iface, ep->tx.prev_sw_pi); if (ep->tx.max_pi == exp_max_pi) { UCT_TL_EP_STAT_FLUSH(&ep->super.super); ucs_trace_data("ep %p is flushed", ep); return UCS_OK; } if (ep->super.unsignaled != 0) { status = uct_rc_mlx5_ep_inline_post(ep, MLX5_OPCODE_NOP, NULL, 0, 0, 0, 0, 0); if (status != UCS_OK) { UCT_TL_EP_STAT_FLUSH(&ep->super.super); return status; } } return UCS_INPROGRESS; }
ucs_arbiter_cb_result_t uct_mm_ep_process_pending(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, void *arg) { uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); ucs_status_t status; uct_mm_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_mm_ep_t, arb_group); /* update the local tail with its actual value from the remote peer * making sure that the pending sends would use the real tail value */ ucs_memory_cpu_load_fence(); ep->cached_tail = ep->fifo_ctl->tail; if (!uct_mm_ep_has_tx_resources(ep)) { return UCS_ARBITER_CB_RESULT_RESCHED_GROUP; } status = req->func(req); ucs_trace_data("progress pending request %p returned %s", req, ucs_status_string(status)); if (status == UCS_OK) { /* sent successfully. remove from the arbiter */ return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; } else if (status == UCS_INPROGRESS) { /* sent but not completed, keep in the arbiter */ return UCS_ARBITER_CB_RESULT_NEXT_GROUP; } else { /* couldn't send. keep this request in the arbiter until the next time * this function is called */ return UCS_ARBITER_CB_RESULT_RESCHED_GROUP; } }
unsigned uct_tcp_ep_progress_rx(uct_tcp_ep_t *ep) { uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_tcp_iface_t); uct_tcp_am_hdr_t *hdr; ucs_status_t status; size_t recv_length; ssize_t remainder; ucs_trace_func("ep=%p", ep); /* Receive next chunk of data */ recv_length = iface->config.buf_size - ep->length; status = uct_tcp_recv(ep->fd, ep->buf + ep->length, &recv_length); if (status != UCS_OK) { if (status == UCS_ERR_CANCELED) { ucs_debug("tcp_ep %p: remote disconnected", ep); uct_tcp_ep_mod_events(ep, 0, EPOLLIN); uct_tcp_ep_destroy(&ep->super.super); } return 0; } ep->length += recv_length; ucs_trace_data("tcp_ep %p: recvd %zu bytes", ep, recv_length); /* Parse received active messages */ while ((remainder = ep->length - ep->offset) >= sizeof(*hdr)) { hdr = ep->buf + ep->offset; if (remainder < sizeof(*hdr) + hdr->length) { break; } /* Full message was received */ ep->offset += sizeof(*hdr) + hdr->length; if (hdr->am_id >= UCT_AM_ID_MAX) { ucs_error("invalid am id: %d", hdr->am_id); continue; } uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_RECV, hdr->am_id, hdr + 1, hdr->length, "RECV fd %d", ep->fd); uct_iface_invoke_am(&iface->super, hdr->am_id, hdr + 1, hdr->length, 0); } /* Move the remaining data to the beginning of the buffer * TODO avoid extra copy on partial receive */ ucs_assert(remainder >= 0); memmove(ep->buf, ep->buf + ep->offset, remainder); ep->offset = 0; ep->length = remainder; return recv_length > 0; }
static void uct_cm_dump_path(struct ibv_sa_path_rec *path) { char sgid_buf[256]; char dgid_buf[256]; inet_ntop(AF_INET6, &path->dgid, dgid_buf, sizeof(dgid_buf)); inet_ntop(AF_INET6, &path->sgid, sgid_buf, sizeof(sgid_buf)); ucs_trace_data("slid %d sgid %s dlid %d dgid %s", ntohs(path->slid), sgid_buf, ntohs(path->dlid), dgid_buf); ucs_trace_data("traffic %d flow_label %d hop %d class %d revers. 0x%x " "numb %d pkey 0x%x sl %d", path->raw_traffic, path->flow_label, path->hop_limit, path->traffic_class, path->reversible, path->numb_path, path->pkey, path->sl); ucs_trace_data("mtu %d(%d) rate %d(%d) lifetime %d(%d) pref %d", path->mtu, path->mtu_selector, path->rate, path->rate_selector, path->packet_life_time, path->packet_life_time_selector, path->preference); }
ucs_status_t uct_rocm_copy_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { memcpy((void *)remote_addr, buffer, length); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, SHORT, length); ucs_trace_data("PUT_SHORT size %d from %p to %p", length, buffer, (void *)remote_addr); return UCS_OK; }
ucs_status_t uct_mm_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey) { if (ucs_likely(length != 0)) { pack_cb((void *)(rkey + remote_addr), arg, length); uct_mm_trace_data(remote_addr, rkey, "PUT_BCOPY [size %zu]", length); } else { ucs_trace_data("PUT_BCOPY [zero-length]"); } return UCS_OK; }
ucs_status_t uct_rocm_copy_ep_get_short(uct_ep_h tl_ep, void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { /* device to host */ memcpy(buffer, (void *)remote_addr, length); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, SHORT, length); ucs_trace_data("GET_SHORT size %d from %p to %p", length, (void *)remote_addr, buffer); return UCS_OK; }
ssize_t uct_ugni_udt_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg) { uct_ugni_udt_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_udt_iface_t); uct_ugni_udt_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_udt_ep_t); ucs_trace_data("AM_BCOPY [%p] am_id: %d buf=%p", iface, id, arg ); return uct_ugni_udt_ep_am_common_send(UCT_UGNI_UDT_AM_BCOPY, ep, iface, id, 0, 0, NULL, pack_cb, arg); }
ucs_status_t uct_mm_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { if (ucs_likely(0 != length)) { unpack_cb(arg, (void *)(rkey + remote_addr), length); uct_mm_trace_data(remote_addr, rkey, "GET_BCOPY [length %zu]", length); } else { ucs_trace_data("GET_BCOPY [zero-length]"); } return UCS_OK; }
ucs_status_t uct_mm_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { if (ucs_likely(length != 0)) { memcpy((void *)(rkey + remote_addr), buffer, length); uct_mm_trace_data(remote_addr, rkey, "PUT_SHORT [buffer %p size %u]", buffer, length); } else { ucs_trace_data("PUT_SHORT [zero-length]"); } return UCS_OK; }
ucs_status_t uct_ugni_udt_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t header, const void *payload, unsigned length) { uct_ugni_udt_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_udt_iface_t); uct_ugni_udt_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_udt_ep_t); UCT_CHECK_LENGTH(length, iface->config.udt_seg_size - sizeof(header) - sizeof(uct_ugni_udt_header_t), "am_short"); ucs_trace_data("AM_SHORT [%p] am_id: %d buf=%p length=%u", iface, id, payload, length); return uct_ugni_udt_ep_am_common_send(UCT_UGNI_UDT_AM_SHORT, ep, iface, id, length, header, payload, NULL, NULL); }
ucs_status_t uct_sm_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { if (ucs_likely(length != 0)) { memcpy((void *)(rkey + remote_addr), buffer, length); uct_sm_ep_trace_data(remote_addr, rkey, "PUT_SHORT [buffer %p size %u]", buffer, length); } else { ucs_trace_data("PUT_SHORT [zero-length]"); } UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, SHORT, length); return UCS_OK; }
/** * dispatch requests waiting for tx resources */ ucs_arbiter_cb_result_t uct_dc_mlx5_iface_dci_do_pending_tx(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, void *arg) { uct_dc_mlx5_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_dc_mlx5_ep_t, arb_group); uct_dc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_dc_mlx5_iface_t); uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); ucs_status_t status; if (!uct_rc_iface_has_tx_resources(&iface->super.super)) { return UCS_ARBITER_CB_RESULT_STOP; } status = req->func(req); ucs_trace_data("progress pending request %p returned: %s", req, ucs_status_string(status)); if (status == UCS_OK) { /* For dcs* policies release dci if this is the last elem in the group * and the dci has no outstanding operations. For example pending * callback did not send anything. (uct_ep_flush or just return ok) */ if (ucs_arbiter_elem_is_last(&ep->arb_group, elem)) { uct_dc_mlx5_iface_dci_free(iface, ep); } return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; } if (status == UCS_INPROGRESS) { return UCS_ARBITER_CB_RESULT_NEXT_GROUP; } if (!uct_dc_mlx5_iface_dci_ep_can_send(ep)) { /* Deschedule the group even if FC is the only resource, which * is missing. It will be scheduled again when credits arrive. * We can't desched group with rand policy if non FC resources are * missing, since it's never scheduled again. */ if (uct_dc_mlx5_iface_is_dci_rand(iface) && uct_rc_fc_has_resources(&iface->super.super, &ep->fc)) { return UCS_ARBITER_CB_RESULT_RESCHED_GROUP; } else { return UCS_ARBITER_CB_RESULT_DESCHED_GROUP; } } ucs_assertv(!uct_rc_iface_has_tx_resources(&iface->super.super), "pending callback returned error but send resources are available"); return UCS_ARBITER_CB_RESULT_STOP; }
ucs_status_t uct_ud_verbs_ep_create_connected(uct_iface_h iface_h, const struct sockaddr *addr, uct_ep_h *new_ep_p) { uct_ud_verbs_iface_t *iface = ucs_derived_of(iface_h, uct_ud_verbs_iface_t); uct_ud_verbs_ep_t *ep; uct_ud_ep_t *new_ud_ep; const uct_sockaddr_ib_t *if_addr = (const uct_sockaddr_ib_t *)addr; uct_ud_send_skb_t *skb; struct ibv_ah *ah; ucs_status_t status; uct_ud_enter(&iface->super); status = uct_ud_ep_create_connected_common(&iface->super, if_addr, &new_ud_ep, &skb); if (status != UCS_OK) { return status; } ep = ucs_derived_of(new_ud_ep, uct_ud_verbs_ep_t); *new_ep_p = &ep->super.super.super; if (skb == NULL) { uct_ud_leave(&iface->super); return UCS_OK; } ucs_assert_always(ep->ah == NULL); ah = uct_ib_create_ah(&iface->super.super, if_addr->lid); if (ah == NULL) { ucs_error("failed to create address handle: %m"); status = UCS_ERR_INVALID_ADDR; goto err; } ep->ah = ah; ucs_trace_data("TX: CREQ (qp=%x lid=%d)", if_addr->qp_num, if_addr->lid); uct_ud_verbs_ep_tx_skb(iface, ep, skb, IBV_SEND_INLINE); uct_ud_iface_complete_tx_skb(&iface->super, &ep->super, skb); uct_ud_leave(&iface->super); return UCS_OK; err: uct_ud_ep_destroy_connected(&ep->super, if_addr); uct_ud_leave(&iface->super); *new_ep_p = NULL; return status; }
ucs_status_t uct_ud_mlx5_ep_create_connected(uct_iface_h iface_h, const struct sockaddr *addr, uct_ep_h *new_ep_p) { uct_ud_mlx5_iface_t *iface = ucs_derived_of(iface_h, uct_ud_mlx5_iface_t); uct_ud_mlx5_ep_t *ep; uct_ud_ep_t *new_ud_ep; const uct_sockaddr_ib_t *if_addr = (const uct_sockaddr_ib_t *)addr; uct_ud_send_skb_t *skb; ucs_status_t status; uct_ud_enter(&iface->super); status = uct_ud_ep_create_connected_common(&iface->super, if_addr, &new_ud_ep, &skb); if (status != UCS_OK) { uct_ud_leave(&iface->super); return status; } ep = ucs_derived_of(new_ud_ep, uct_ud_mlx5_ep_t); *new_ep_p = &ep->super.super.super; if (skb == NULL) { uct_ud_leave(&iface->super); return UCS_OK; } status = uct_ud_mlx5_ep_create_ah(iface, ep, if_addr); if (status != UCS_OK) { goto err; } ucs_trace_data("TX: CREQ (qp=%x lid=%d)", if_addr->qp_num, if_addr->lid); uct_ud_mlx5_ep_tx_skb(iface, ep, skb); uct_ud_iface_complete_tx_skb(&iface->super, &ep->super, skb); uct_ud_leave(&iface->super); return UCS_OK; err: uct_ud_ep_destroy_connected(&ep->super, if_addr); uct_ud_leave(&iface->super); *new_ep_p = NULL; return status; }
static ucs_status_t uct_ud_verbs_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { uct_ud_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_verbs_ep_t); uct_ud_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ud_verbs_iface_t); uct_ud_send_skb_t *skb; uct_ud_put_hdr_t *put_hdr; uct_ud_neth_t *neth; if (!uct_ud_ep_is_connected(&ep->super)) { return UCS_ERR_NO_RESOURCE; } /* TODO: UCT_CHECK_LENGTH(length <= iface->config.max_inline, "put_short"); */ skb = uct_ud_iface_get_tx_skb(&iface->super, &ep->super); if (!skb) { return UCS_ERR_NO_RESOURCE; } skb = uct_ud_iface_get_tx_skb(&iface->super, &ep->super); if (!skb) { return UCS_ERR_NO_RESOURCE; } neth = skb->neth; uct_ud_neth_init_data(&ep->super, neth); uct_ud_neth_set_type_put(&ep->super, neth); uct_ud_neth_ack_req(&ep->super, neth); put_hdr = (uct_ud_put_hdr_t *)(neth+1); put_hdr->rva = remote_addr; iface->tx.sge[0].addr = (uintptr_t)neth; iface->tx.sge[0].length = sizeof(*neth) + sizeof(*put_hdr); uct_ud_verbs_iface_tx_inl(iface, ep, buffer, length); ucs_trace_data("TX: PUT [%0llx] buf=%p len=%u", (unsigned long long)remote_addr, buffer, length); skb->len = iface->tx.sge[0].length; uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb, put_hdr+1, buffer, length); return UCS_OK; }
static void uct_cm_iface_handle_sidr_req(uct_cm_iface_t *iface, struct ib_cm_event *event) { uct_cm_hdr_t *hdr = event->private_data; struct ib_cm_sidr_rep_param rep; ucs_status_t status; void *cm_desc, *desc; int ret; VALGRIND_MAKE_MEM_DEFINED(hdr, sizeof(hdr)); VALGRIND_MAKE_MEM_DEFINED(hdr + 1, hdr->length); uct_cm_iface_trace_data(iface, UCT_AM_TRACE_TYPE_RECV, hdr, "RX: SIDR_REQ"); /* Allocate temporary buffer to serve as receive descriptor */ cm_desc = ucs_malloc(iface->super.config.rx_payload_offset + hdr->length, "cm_recv_desc"); if (cm_desc == NULL) { ucs_error("failed to allocate cm receive descriptor"); return; } /* Send reply */ ucs_trace_data("TX: SIDR_REP [id %p{%u}]", event->cm_id, event->cm_id->handle); memset(&rep, 0, sizeof rep); rep.status = IB_SIDR_SUCCESS; ret = ib_cm_send_sidr_rep(event->cm_id, &rep); if (ret) { ucs_error("ib_cm_send_sidr_rep() failed: %m"); } /* Call active message handler */ desc = cm_desc + iface->super.config.rx_headroom_offset; uct_recv_desc_iface(desc) = &iface->super.super.super; status = uct_iface_invoke_am(&iface->super.super, hdr->am_id, hdr + 1, hdr->length, desc); if (status == UCS_OK) { ucs_free(cm_desc); } }