コード例 #1
0
ファイル: ugni_rdma_ep.c プロジェクト: alex-mikheev/ucx
ucs_status_t uct_ugni_ep_get_bcopy(uct_ep_h tl_ep,
                                   uct_unpack_callback_t unpack_cb,
                                   void *arg, size_t length,
                                   uint64_t remote_addr, uct_rkey_t rkey,
                                   uct_completion_t *comp)
{
    uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t);
    uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t);
    uct_ugni_rdma_fetch_desc_t *fma;

    UCT_SKIP_ZERO_LENGTH(length);
    UCT_CHECK_LENGTH(ucs_align_up_pow2(length, UGNI_GET_ALIGN), 0,
                     iface->config.fma_seg_size, "get_bcopy");
    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get_buffer,
                             fma, return UCS_ERR_NO_RESOURCE);

    uct_ugni_format_get_fma(fma, GNI_POST_FMA_GET,
                            remote_addr, rkey, length,
                            ep, comp,
                            uct_ugni_unalign_fma_get_cb,
                            unpack_cb, arg);

    ucs_trace_data("Posting GET BCOPY, GNI_PostFma of size %"PRIx64" (%lu) from %p to "
                   "%p, with [%"PRIx64" %"PRIx64"]",
                   fma->super.desc.length, length,
                   (void *)fma->super.desc.local_addr,
                   (void *)fma->super.desc.remote_addr,
                   fma->super.desc.remote_mem_hndl.qword1,
                   fma->super.desc.remote_mem_hndl.qword2);
    UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, BCOPY, length);
    return uct_ugni_post_fma(iface, ep, &fma->super, UCS_INPROGRESS);
}
コード例 #2
0
ファイル: ugni_rdma_ep.c プロジェクト: alex-mikheev/ucx
ssize_t uct_ugni_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb,
                              void *arg, uint64_t remote_addr, uct_rkey_t rkey)
{
    /* Since custom pack function is used
     * we have to allocate separate memory to pack
     * the info and pass it to FMA
     * something like:
     * pack_cb(desc + 1, arg, length); */
    uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t);
    uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t);
    uct_ugni_base_desc_t *fma;
    size_t length;

    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_buffer,
                             fma, return UCS_ERR_NO_RESOURCE);

    length = pack_cb(fma + 1, arg);
    UCT_SKIP_ZERO_LENGTH(length, fma);
    UCT_CHECK_LENGTH(length, 0, iface->config.fma_seg_size, "put_bcopy");
    uct_ugni_format_fma(fma, GNI_POST_FMA_PUT, fma + 1,
                        remote_addr, rkey, length, ep, NULL, NULL);
    ucs_trace_data("Posting PUT BCOPY, GNI_PostFma of size %"PRIx64" from %p to "
                   "%p, with [%"PRIx64" %"PRIx64"]",
                   fma->desc.length,
                   (void *)fma->desc.local_addr,
                   (void *)fma->desc.remote_addr,
                   fma->desc.remote_mem_hndl.qword1,
                   fma->desc.remote_mem_hndl.qword2);
    UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, BCOPY, length);
    return uct_ugni_post_fma(iface, ep, fma, length);
}
コード例 #3
0
ファイル: ugni_rdma_ep.c プロジェクト: alex-mikheev/ucx
ucs_status_t uct_ugni_ep_atomic_add32(uct_ep_h tl_ep, uint32_t add,
                                      uint64_t remote_addr, uct_rkey_t rkey)
{
    uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t);
    uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t);
    uct_ugni_rdma_fetch_desc_t *fma;

    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_famo, fma,
                             return UCS_ERR_NO_RESOURCE);
    uct_ugni_format_fma_amo(fma, GNI_POST_AMO, GNI_FMA_ATOMIC2_IADD_S,
                            (uint64_t)add, 0, NULL, remote_addr,
                            rkey, LEN_32, ep, NULL, NULL, NULL);
    ucs_trace_data("Posting AMO ADD, GNI_PostFma of size %"PRIx64" value"
                   "%"PRIx32" to %p, with [%"PRIx64" %"PRIx64"]",
                   fma->super.desc.length, add,
                   (void *)fma->super.desc.remote_addr,
                   fma->super.desc.remote_mem_hndl.qword1,
                   fma->super.desc.remote_mem_hndl.qword2);
    UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t));
    return uct_ugni_post_fma(iface, ep, &fma->super, UCS_OK);
}
コード例 #4
0
ファイル: ugni_rdma_ep.c プロジェクト: alex-mikheev/ucx
ucs_status_t uct_ugni_ep_atomic_cswap32(uct_ep_h tl_ep, uint32_t compare, uint32_t swap,
                                        uint64_t remote_addr, uct_rkey_t rkey,
                                        uint32_t *result, uct_completion_t *comp)
{
    uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t);
    uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t);
    uct_ugni_rdma_fetch_desc_t *fma;

    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_famo, fma,
                             return UCS_ERR_NO_RESOURCE);
    uct_ugni_format_fma_amo(fma, GNI_POST_AMO, GNI_FMA_ATOMIC2_FCSWAP_S,
                            (uint64_t)compare, (uint64_t)swap, fma + 1, remote_addr,
                            rkey, LEN_32, ep, comp, uct_ugni_amo_unpack32, (void *)result);
    ucs_trace_data("Posting AMO CSWAP, GNI_PostFma of size %"PRIx64" value"
                   "%"PRIx32" compare %"PRIx32" to %p, with [%"PRIx64" %"PRIx64"]",
                   fma->super.desc.length, swap, compare,
                   (void *)fma->super.desc.remote_addr,
                   fma->super.desc.remote_mem_hndl.qword1,
                   fma->super.desc.remote_mem_hndl.qword2);
    UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t));
    return uct_ugni_post_fma(iface, ep, &fma->super, UCS_INPROGRESS);
}
コード例 #5
0
ファイル: ugni_rdma_ep.c プロジェクト: openucx/ucx
ucs_status_t uct_ugni_ep_atomic_op64(uct_ep_h tl_ep, uint64_t op,
                                     uint64_t remote_addr, uct_rkey_t rkey,
                                     gni_fma_cmd_type_t op_type, char *op_str)
{
    uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t);
    uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t);
    uct_ugni_rdma_fetch_desc_t *fma;

    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_famo, fma,
                             return UCS_ERR_NO_RESOURCE);
    uct_ugni_format_fma_amo(fma, GNI_POST_AMO, op_type,
                            op, 0, NULL, remote_addr,
                            rkey, LEN_64, ep, NULL, NULL, NULL);
    ucs_trace_data("Posting AMO %s, GNI_PostFma of size %"PRIx64" value"
                   "%"PRIx64" to %p, with [%"PRIx64" %"PRIx64"]",
                   op_str, fma->super.desc.length, op,
                   (void *)fma->super.desc.remote_addr,
                   fma->super.desc.remote_mem_hndl.qword1,
                   fma->super.desc.remote_mem_hndl.qword2);
    UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t));
    return uct_ugni_post_fma(iface, ep, &fma->super, UCS_OK);
}
コード例 #6
0
ファイル: ugni_rdma_ep.c プロジェクト: alex-mikheev/ucx
ucs_status_t uct_ugni_ep_put_short(uct_ep_h tl_ep, const void *buffer,
                                   unsigned length, uint64_t remote_addr,
                                   uct_rkey_t rkey)
{
    uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t);
    uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t);
    uct_ugni_base_desc_t *fma;

    UCT_SKIP_ZERO_LENGTH(length);
    UCT_CHECK_LENGTH(length, 0, iface->config.fma_seg_size, "put_short");
    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc,
                             fma, return UCS_ERR_NO_RESOURCE);
    uct_ugni_format_fma(fma, GNI_POST_FMA_PUT, buffer,
                        remote_addr, rkey, length, ep, NULL, NULL);
    ucs_trace_data("Posting PUT Short, GNI_PostFma of size %"PRIx64" from %p to "
                   "%p, with [%"PRIx64" %"PRIx64"]",
                   fma->desc.length,
                   (void *)fma->desc.local_addr,
                   (void *)fma->desc.remote_addr,
                   fma->desc.remote_mem_hndl.qword1,
                   fma->desc.remote_mem_hndl.qword2);
    UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, SHORT, length);
    return uct_ugni_post_fma(iface, ep, fma, UCS_OK);
}
コード例 #7
0
ファイル: ugni_rdma_ep.c プロジェクト: alex-mikheev/ucx
static ucs_status_t uct_ugni_ep_get_composed_fma_rdma(uct_ep_h tl_ep, void *buffer, size_t length,
                                                      uct_mem_h memh, uint64_t remote_addr,
                                                      uct_rkey_t rkey, uct_completion_t *comp)
{
    uct_ugni_rdma_fetch_desc_t *fma = NULL;
    uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t);
    uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t);
    uct_ugni_rdma_fetch_desc_t *rdma = NULL;

    size_t fma_length, rdma_length, aligned_fma_remote_start;

    uint64_t fma_remote_start, rdma_remote_start;
    ucs_status_t post_result;


    rdma_length = length - iface->config.fma_seg_size;
    fma_length = iface->config.fma_seg_size;

    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get_buffer,
                             fma, return UCS_ERR_NO_RESOURCE);

    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get,
                             rdma, return UCS_ERR_NO_RESOURCE);

    rdma_remote_start = remote_addr;
    fma_remote_start = rdma_remote_start + rdma_length;
    aligned_fma_remote_start = ucs_align_up_pow2(fma_remote_start, UGNI_GET_ALIGN);

    uct_ugni_format_get_fma(fma, GNI_POST_FMA_GET, aligned_fma_remote_start,
                            rkey, fma_length, ep, comp,
                            uct_ugni_unalign_fma_composed_cb, NULL, NULL);

    fma->tail = aligned_fma_remote_start - fma_remote_start;

    uct_ugni_format_unaligned_rdma(rdma, GNI_POST_RDMA_GET, buffer, rdma_remote_start, memh, rkey,
                                   rdma_length+fma->tail, ep, iface->super.local_cq, comp,
                                   uct_ugni_unalign_rdma_composed_cb);

    fma->head = rdma;
    rdma->head = fma;
    fma->network_completed_bytes = rdma->network_completed_bytes = 0;
    fma->user_buffer = rdma->user_buffer = buffer;
    fma->expected_bytes = rdma->expected_bytes = fma->super.desc.length + rdma->super.desc.length;

    ucs_trace_data("Posting split GET ZCOPY, GNI_PostFma of size %"PRIx64" (%lu) from %p to "
                   "%p, with [%"PRIx64" %"PRIx64"] and GNI_PostRdma of size %"PRIx64" (%lu)"
                   " from %p to %p, with [%"PRIx64" %"PRIx64"]",
                   fma->super.desc.length, length,
                   (void *)fma->super.desc.local_addr,
                   (void *)fma->super.desc.remote_addr,
                   fma->super.desc.remote_mem_hndl.qword1,
                   fma->super.desc.remote_mem_hndl.qword2,
                   rdma->super.desc.length, length,
                   (void *)rdma->super.desc.local_addr,
                   (void *)rdma->super.desc.remote_addr,
                   rdma->super.desc.remote_mem_hndl.qword1,
                   rdma->super.desc.remote_mem_hndl.qword2);
    UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, length);
    post_result = uct_ugni_post_fma(iface, ep, &(fma->super), UCS_INPROGRESS);
    if(post_result != UCS_OK && post_result != UCS_INPROGRESS){
        ucs_mpool_put(rdma);
        return post_result;
    }
    return uct_ugni_post_rdma(iface, ep, &(rdma->super));
}