ucs_status_t uct_ugni_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_rdma_fetch_desc_t *fma; UCT_SKIP_ZERO_LENGTH(length); UCT_CHECK_LENGTH(ucs_align_up_pow2(length, UGNI_GET_ALIGN), 0, iface->config.fma_seg_size, "get_bcopy"); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get_buffer, fma, return UCS_ERR_NO_RESOURCE); uct_ugni_format_get_fma(fma, GNI_POST_FMA_GET, remote_addr, rkey, length, ep, comp, uct_ugni_unalign_fma_get_cb, unpack_cb, arg); ucs_trace_data("Posting GET BCOPY, GNI_PostFma of size %"PRIx64" (%lu) from %p to " "%p, with [%"PRIx64" %"PRIx64"]", fma->super.desc.length, length, (void *)fma->super.desc.local_addr, (void *)fma->super.desc.remote_addr, fma->super.desc.remote_mem_hndl.qword1, fma->super.desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, BCOPY, length); return uct_ugni_post_fma(iface, ep, &fma->super, UCS_INPROGRESS); }
ucs_status_t uct_ugni_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_base_desc_t *rdma; UCT_CHECK_PARAM_IOV(iov, iovcnt, buffer, length, memh); UCT_SKIP_ZERO_LENGTH(length); UCT_CHECK_LENGTH(length, 0, iface->config.rdma_max_size, "put_zcopy"); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, rdma, return UCS_ERR_NO_RESOURCE); /* Setup Callback */ uct_ugni_format_rdma(rdma, GNI_POST_RDMA_PUT, buffer, remote_addr, memh, rkey, length, ep, iface->super.local_cq, comp); ucs_trace_data("Posting PUT ZCOPY, GNI_PostRdma of size %"PRIx64" from %p to %p, with [%"PRIx64" %"PRIx64"]", rdma->desc.length, (void *)rdma->desc.local_addr, (void *)rdma->desc.remote_addr, rdma->desc.remote_mem_hndl.qword1, rdma->desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, ZCOPY, length); return uct_ugni_post_rdma(iface, ep, rdma); }
ssize_t uct_ugni_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, void *arg, uint64_t remote_addr, uct_rkey_t rkey) { /* Since custom pack function is used * we have to allocate separate memory to pack * the info and pass it to FMA * something like: * pack_cb(desc + 1, arg, length); */ uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_base_desc_t *fma; size_t length; UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_buffer, fma, return UCS_ERR_NO_RESOURCE); length = pack_cb(fma + 1, arg); UCT_SKIP_ZERO_LENGTH(length, fma); UCT_CHECK_LENGTH(length, 0, iface->config.fma_seg_size, "put_bcopy"); uct_ugni_format_fma(fma, GNI_POST_FMA_PUT, fma + 1, remote_addr, rkey, length, ep, NULL, NULL); ucs_trace_data("Posting PUT BCOPY, GNI_PostFma of size %"PRIx64" from %p to " "%p, with [%"PRIx64" %"PRIx64"]", fma->desc.length, (void *)fma->desc.local_addr, (void *)fma->desc.remote_addr, fma->desc.remote_mem_hndl.qword1, fma->desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, BCOPY, length); return uct_ugni_post_fma(iface, ep, fma, length); }
static inline ucs_status_t uct_rc_verbs_ep_rdma_zcopy(uct_rc_verbs_ep_t *ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp, int opcode) { uct_rc_verbs_iface_t *iface = ucs_derived_of(ep->super.super.super.iface, uct_rc_verbs_iface_t); struct ibv_sge sge[UCT_IB_MAX_IOV]; struct ibv_send_wr wr; size_t sge_cnt; UCT_RC_CHECK_RES(&iface->super, &ep->super); sge_cnt = uct_ib_verbs_sge_fill_iov(sge, iov, iovcnt); UCT_SKIP_ZERO_LENGTH(sge_cnt); UCT_RC_VERBS_FILL_RDMA_WR_IOV(wr, wr.opcode, opcode, sge, sge_cnt, remote_addr, rkey); wr.next = NULL; uct_rc_verbs_ep_post_send(iface, ep, &wr, IBV_SEND_SIGNALED); uct_rc_txqp_add_send_comp(&iface->super, &ep->super.txqp, comp, ep->txcnt.pi); return UCS_INPROGRESS; }
ucs_status_t uct_ugni_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_base_desc_t *rdma; UCT_CHECK_PARAM_IOV(iov, iovcnt, buffer, length, memh); UCT_SKIP_ZERO_LENGTH(length); UCT_CHECK_LENGTH(ucs_align_up_pow2(length, UGNI_GET_ALIGN), 0, iface->config.rdma_max_size, "get_zcopy"); /* Special flow for an unalign data */ if (ucs_unlikely((GNI_DEVICE_GEMINI == iface->super.dev->type && ucs_check_if_align_pow2((uintptr_t)buffer, UGNI_GET_ALIGN)) || ucs_check_if_align_pow2(remote_addr, UGNI_GET_ALIGN) || ucs_check_if_align_pow2(length, UGNI_GET_ALIGN))) { return uct_ugni_ep_get_composed(tl_ep, buffer, length, memh, remote_addr, rkey, comp); } /* Everything is perfectly aligned */ UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, rdma, return UCS_ERR_NO_RESOURCE); /* Setup Callback */ uct_ugni_format_rdma(rdma, GNI_POST_RDMA_GET, buffer, remote_addr, memh, rkey, ucs_align_up_pow2(length, UGNI_GET_ALIGN), ep, iface->super.local_cq, comp); ucs_trace_data("Posting GET ZCOPY, GNI_PostRdma of size %"PRIx64" (%lu) " "from %p to %p, with [%"PRIx64" %"PRIx64"]", rdma->desc.length, length, (void *)rdma->desc.local_addr, (void *)rdma->desc.remote_addr, rdma->desc.remote_mem_hndl.qword1, rdma->desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, length); return uct_ugni_post_rdma(iface, ep, rdma); }
ucs_status_t uct_ugni_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_base_desc_t *fma; UCT_SKIP_ZERO_LENGTH(length); UCT_CHECK_LENGTH(length, 0, iface->config.fma_seg_size, "put_short"); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, fma, return UCS_ERR_NO_RESOURCE); uct_ugni_format_fma(fma, GNI_POST_FMA_PUT, buffer, remote_addr, rkey, length, ep, NULL, NULL); ucs_trace_data("Posting PUT Short, GNI_PostFma of size %"PRIx64" from %p to " "%p, with [%"PRIx64" %"PRIx64"]", fma->desc.length, (void *)fma->desc.local_addr, (void *)fma->desc.remote_addr, fma->desc.remote_mem_hndl.qword1, fma->desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, SHORT, length); return uct_ugni_post_fma(iface, ep, fma, UCS_OK); }