ucs_status_t uct_ugni_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_rdma_fetch_desc_t *fma; UCT_SKIP_ZERO_LENGTH(length); UCT_CHECK_LENGTH(ucs_align_up_pow2(length, UGNI_GET_ALIGN), 0, iface->config.fma_seg_size, "get_bcopy"); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get_buffer, fma, return UCS_ERR_NO_RESOURCE); uct_ugni_format_get_fma(fma, GNI_POST_FMA_GET, remote_addr, rkey, length, ep, comp, uct_ugni_unalign_fma_get_cb, unpack_cb, arg); ucs_trace_data("Posting GET BCOPY, GNI_PostFma of size %"PRIx64" (%lu) from %p to " "%p, with [%"PRIx64" %"PRIx64"]", fma->super.desc.length, length, (void *)fma->super.desc.local_addr, (void *)fma->super.desc.remote_addr, fma->super.desc.remote_mem_hndl.qword1, fma->super.desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, BCOPY, length); return uct_ugni_post_fma(iface, ep, &fma->super, UCS_INPROGRESS); }
ssize_t uct_ugni_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, void *arg, uint64_t remote_addr, uct_rkey_t rkey) { /* Since custom pack function is used * we have to allocate separate memory to pack * the info and pass it to FMA * something like: * pack_cb(desc + 1, arg, length); */ uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_base_desc_t *fma; size_t length; UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_buffer, fma, return UCS_ERR_NO_RESOURCE); length = pack_cb(fma + 1, arg); UCT_SKIP_ZERO_LENGTH(length, fma); UCT_CHECK_LENGTH(length, 0, iface->config.fma_seg_size, "put_bcopy"); uct_ugni_format_fma(fma, GNI_POST_FMA_PUT, fma + 1, remote_addr, rkey, length, ep, NULL, NULL); ucs_trace_data("Posting PUT BCOPY, GNI_PostFma of size %"PRIx64" from %p to " "%p, with [%"PRIx64" %"PRIx64"]", fma->desc.length, (void *)fma->desc.local_addr, (void *)fma->desc.remote_addr, fma->desc.remote_mem_hndl.qword1, fma->desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, BCOPY, length); return uct_ugni_post_fma(iface, ep, fma, length); }
ucs_status_t uct_ugni_ep_atomic_add32(uct_ep_h tl_ep, uint32_t add, uint64_t remote_addr, uct_rkey_t rkey) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_rdma_fetch_desc_t *fma; UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_famo, fma, return UCS_ERR_NO_RESOURCE); uct_ugni_format_fma_amo(fma, GNI_POST_AMO, GNI_FMA_ATOMIC2_IADD_S, (uint64_t)add, 0, NULL, remote_addr, rkey, LEN_32, ep, NULL, NULL, NULL); ucs_trace_data("Posting AMO ADD, GNI_PostFma of size %"PRIx64" value" "%"PRIx32" to %p, with [%"PRIx64" %"PRIx64"]", fma->super.desc.length, add, (void *)fma->super.desc.remote_addr, fma->super.desc.remote_mem_hndl.qword1, fma->super.desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t)); return uct_ugni_post_fma(iface, ep, &fma->super, UCS_OK); }
ucs_status_t uct_ugni_ep_atomic_cswap32(uct_ep_h tl_ep, uint32_t compare, uint32_t swap, uint64_t remote_addr, uct_rkey_t rkey, uint32_t *result, uct_completion_t *comp) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_rdma_fetch_desc_t *fma; UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_famo, fma, return UCS_ERR_NO_RESOURCE); uct_ugni_format_fma_amo(fma, GNI_POST_AMO, GNI_FMA_ATOMIC2_FCSWAP_S, (uint64_t)compare, (uint64_t)swap, fma + 1, remote_addr, rkey, LEN_32, ep, comp, uct_ugni_amo_unpack32, (void *)result); ucs_trace_data("Posting AMO CSWAP, GNI_PostFma of size %"PRIx64" value" "%"PRIx32" compare %"PRIx32" to %p, with [%"PRIx64" %"PRIx64"]", fma->super.desc.length, swap, compare, (void *)fma->super.desc.remote_addr, fma->super.desc.remote_mem_hndl.qword1, fma->super.desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t)); return uct_ugni_post_fma(iface, ep, &fma->super, UCS_INPROGRESS); }
ucs_status_t uct_ugni_ep_atomic_op64(uct_ep_h tl_ep, uint64_t op, uint64_t remote_addr, uct_rkey_t rkey, gni_fma_cmd_type_t op_type, char *op_str) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_rdma_fetch_desc_t *fma; UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_famo, fma, return UCS_ERR_NO_RESOURCE); uct_ugni_format_fma_amo(fma, GNI_POST_AMO, op_type, op, 0, NULL, remote_addr, rkey, LEN_64, ep, NULL, NULL, NULL); ucs_trace_data("Posting AMO %s, GNI_PostFma of size %"PRIx64" value" "%"PRIx64" to %p, with [%"PRIx64" %"PRIx64"]", op_str, fma->super.desc.length, op, (void *)fma->super.desc.remote_addr, fma->super.desc.remote_mem_hndl.qword1, fma->super.desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t)); return uct_ugni_post_fma(iface, ep, &fma->super, UCS_OK); }
ucs_status_t uct_ugni_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_base_desc_t *fma; UCT_SKIP_ZERO_LENGTH(length); UCT_CHECK_LENGTH(length, 0, iface->config.fma_seg_size, "put_short"); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, fma, return UCS_ERR_NO_RESOURCE); uct_ugni_format_fma(fma, GNI_POST_FMA_PUT, buffer, remote_addr, rkey, length, ep, NULL, NULL); ucs_trace_data("Posting PUT Short, GNI_PostFma of size %"PRIx64" from %p to " "%p, with [%"PRIx64" %"PRIx64"]", fma->desc.length, (void *)fma->desc.local_addr, (void *)fma->desc.remote_addr, fma->desc.remote_mem_hndl.qword1, fma->desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, SHORT, length); return uct_ugni_post_fma(iface, ep, fma, UCS_OK); }
static ucs_status_t uct_ugni_ep_get_composed_fma_rdma(uct_ep_h tl_ep, void *buffer, size_t length, uct_mem_h memh, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ugni_rdma_fetch_desc_t *fma = NULL; uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_rdma_fetch_desc_t *rdma = NULL; size_t fma_length, rdma_length, aligned_fma_remote_start; uint64_t fma_remote_start, rdma_remote_start; ucs_status_t post_result; rdma_length = length - iface->config.fma_seg_size; fma_length = iface->config.fma_seg_size; UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get_buffer, fma, return UCS_ERR_NO_RESOURCE); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get, rdma, return UCS_ERR_NO_RESOURCE); rdma_remote_start = remote_addr; fma_remote_start = rdma_remote_start + rdma_length; aligned_fma_remote_start = ucs_align_up_pow2(fma_remote_start, UGNI_GET_ALIGN); uct_ugni_format_get_fma(fma, GNI_POST_FMA_GET, aligned_fma_remote_start, rkey, fma_length, ep, comp, uct_ugni_unalign_fma_composed_cb, NULL, NULL); fma->tail = aligned_fma_remote_start - fma_remote_start; uct_ugni_format_unaligned_rdma(rdma, GNI_POST_RDMA_GET, buffer, rdma_remote_start, memh, rkey, rdma_length+fma->tail, ep, iface->super.local_cq, comp, uct_ugni_unalign_rdma_composed_cb); fma->head = rdma; rdma->head = fma; fma->network_completed_bytes = rdma->network_completed_bytes = 0; fma->user_buffer = rdma->user_buffer = buffer; fma->expected_bytes = rdma->expected_bytes = fma->super.desc.length + rdma->super.desc.length; ucs_trace_data("Posting split GET ZCOPY, GNI_PostFma of size %"PRIx64" (%lu) from %p to " "%p, with [%"PRIx64" %"PRIx64"] and GNI_PostRdma of size %"PRIx64" (%lu)" " from %p to %p, with [%"PRIx64" %"PRIx64"]", fma->super.desc.length, length, (void *)fma->super.desc.local_addr, (void *)fma->super.desc.remote_addr, fma->super.desc.remote_mem_hndl.qword1, fma->super.desc.remote_mem_hndl.qword2, rdma->super.desc.length, length, (void *)rdma->super.desc.local_addr, (void *)rdma->super.desc.remote_addr, rdma->super.desc.remote_mem_hndl.qword1, rdma->super.desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, length); post_result = uct_ugni_post_fma(iface, ep, &(fma->super), UCS_INPROGRESS); if(post_result != UCS_OK && post_result != UCS_INPROGRESS){ ucs_mpool_put(rdma); return post_result; } return uct_ugni_post_rdma(iface, ep, &(rdma->super)); }