Example #1
0
static UCS_F_ALWAYS_INLINE void
uct_dc_mlx5_iface_atomic_post(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep,
                              unsigned opcode, uct_rc_iface_send_desc_t *desc, unsigned length,
                              uint64_t remote_addr, uct_rkey_t rkey,
                              uint64_t compare_mask, uint64_t compare,
                              uint64_t swap_mask, uint64_t swap_add)
{
    uint32_t ib_rkey = uct_ib_resolve_atomic_rkey(rkey, ep->atomic_mr_offset,
                                                  &remote_addr);

    UCT_DC_MLX5_TXQP_DECL(txqp, txwq);
    UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq);

    desc->super.sn = txwq->sw_pi;
    uct_rc_mlx5_txqp_dptr_post(&iface->super, UCT_IB_QPT_DCI, txqp, txwq,
                               opcode, desc + 1, length, &desc->lkey,
                               remote_addr, ib_rkey,
                               compare_mask, compare, swap_mask, swap_add,
                               &ep->av, uct_dc_mlx5_ep_get_grh(ep),
                               uct_ib_mlx5_wqe_av_size(&ep->av),
                               MLX5_WQE_CTRL_CQ_UPDATE, 0, INT_MAX, NULL);

    UCT_TL_EP_STAT_ATOMIC(&ep->super);
    uct_rc_txqp_add_send_op(txqp, &desc->super);
}
Example #2
0
ucs_status_t uct_sm_ep_atomic_add64(uct_ep_h tl_ep, uint64_t add,
                                    uint64_t remote_addr, uct_rkey_t rkey)
{
    uint64_t *ptr = (uint64_t *)(rkey + remote_addr);
    ucs_atomic_add64(ptr, add);
    uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_ADD64 [add %"PRIu64"]", add);
    UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t));
    return UCS_OK;
}
Example #3
0
ucs_status_t uct_sm_ep_atomic_swap32(uct_ep_h tl_ep, uint32_t swap,
                                     uint64_t remote_addr, uct_rkey_t rkey,
                                     uint32_t *result, uct_completion_t *comp)
{
    uint32_t *ptr = (uint32_t *)(rkey + remote_addr);
    *result = ucs_atomic_swap32(ptr, swap);
    uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_SWAP32 [swap %"PRIu32
    		             " result %"PRIu32"]", swap, *result);
    UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t));
    return UCS_OK;
}
Example #4
0
static UCS_F_ALWAYS_INLINE void
uct_rc_verbs_ep_atomic_post(uct_rc_verbs_ep_t *ep, int opcode, uint64_t compare_add,
                            uint64_t swap, uint64_t remote_addr, uct_rkey_t rkey,
                            uct_rc_iface_send_desc_t *desc, int force_sig)
{
    struct ibv_send_wr wr;
    struct ibv_sge sge;

    UCT_RC_VERBS_FILL_ATOMIC_WR(wr, wr.opcode, sge, opcode,
                                compare_add, swap, remote_addr, rkey, ep->super.umr_offset);
    UCT_TL_EP_STAT_ATOMIC(&ep->super.super);
    uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, force_sig);
}
Example #5
0
ucs_status_t uct_sm_ep_atomic_cswap64(uct_ep_h tl_ep, uint64_t compare,
                                      uint64_t swap, uint64_t remote_addr,
                                      uct_rkey_t rkey, uint64_t *result,
                                      uct_completion_t *comp)
{
    uint64_t *ptr = (uint64_t *)(rkey + remote_addr);
    *result = ucs_atomic_cswap64(ptr, compare, swap);
    uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_CSWAP64 [compare %"PRIu64
    		             " swap %"PRIu64" result %"PRIu64"]", compare, swap,
    		             *result);
    UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t));
    return UCS_OK;
}
Example #6
0
static UCS_F_ALWAYS_INLINE void
uct_rc_mlx5_ep_atomic_post(uct_rc_mlx5_ep_t *ep, unsigned opcode,
                           uct_rc_iface_send_desc_t *desc, unsigned length,
                           uint64_t remote_addr, uct_rkey_t rkey,
                           uint64_t compare_mask, uint64_t compare,
                           uint64_t swap_add, int signal)
{
    desc->super.sn = ep->tx.wq.sw_pi;
    uct_rc_mlx5_ep_dptr_post(ep, opcode, desc + 1, length, &desc->lkey,
                                      0, NULL, 0, remote_addr, rkey, compare_mask,
                                      compare, swap_add, signal);

    UCT_TL_EP_STAT_ATOMIC(&ep->super.super);
    ucs_queue_push(&ep->super.outstanding, &desc->super.queue);
}
Example #7
0
static inline void
uct_rc_verbs_ep_ext_atomic_post(uct_rc_verbs_ep_t *ep, int opcode, uint32_t length,
                               uint64_t compare_mask, uint64_t compare_add,
                               uint64_t swap, uint64_t remote_addr, uct_rkey_t rkey,
                               uct_rc_iface_send_desc_t *desc, uint64_t force_sig)
{
    struct ibv_exp_send_wr wr;
    struct ibv_sge sge;

    uct_rc_verbs_fill_ext_atomic_wr(&wr, &sge, opcode, length, compare_mask,
                                    compare_add, swap, remote_addr, rkey, ep->super.atomic_mr_offset);
    UCT_RC_VERBS_FILL_DESC_WR(&wr, desc);
    UCT_TL_EP_STAT_ATOMIC(&ep->super.super);
    uct_rc_verbs_exp_post_send(ep, &wr, force_sig|IBV_EXP_SEND_EXT_ATOMIC_INLINE);
    uct_rc_txqp_add_send_op_sn(&ep->super.txqp, &desc->super, ep->txcnt.pi);
}
Example #8
0
static inline ucs_status_t
uct_rc_verbs_ext_atomic_post(uct_rc_verbs_ep_t *ep, int opcode, uint32_t length,
                             uint64_t compare_mask, uint64_t compare_add,
                             uint64_t swap, uint64_t remote_addr, uct_rkey_t rkey,
                             uct_rc_iface_send_desc_t *desc, int force_sig,
                             ucs_status_t success)
{
    struct ibv_exp_send_wr wr;
    struct ibv_sge sge;

    sge.addr          = (uintptr_t)(desc + 1);
    sge.lkey          = desc->lkey;
    sge.length        = length;
    wr.next           = NULL;
    wr.sg_list        = &sge;
    wr.num_sge        = 1;
    wr.exp_opcode     = opcode;
    wr.exp_send_flags = IBV_EXP_SEND_EXT_ATOMIC_INLINE;
    wr.comp_mask      = 0;

    wr.ext_op.masked_atomics.log_arg_sz  = ucs_ilog2(length);
    wr.ext_op.masked_atomics.remote_addr = remote_addr;
    wr.ext_op.masked_atomics.rkey        = rkey;

    switch (opcode) {
    case IBV_EXP_WR_EXT_MASKED_ATOMIC_CMP_AND_SWP:
        wr.ext_op.masked_atomics.wr_data.inline_data.op.cmp_swap.compare_mask = compare_mask;
        wr.ext_op.masked_atomics.wr_data.inline_data.op.cmp_swap.compare_val  = compare_add;
        wr.ext_op.masked_atomics.wr_data.inline_data.op.cmp_swap.swap_mask    = (uint64_t)(-1);
        wr.ext_op.masked_atomics.wr_data.inline_data.op.cmp_swap.swap_val     = swap;
        break;
    case IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD:
        wr.ext_op.masked_atomics.wr_data.inline_data.op.fetch_add.add_val        = compare_add;
        wr.ext_op.masked_atomics.wr_data.inline_data.op.fetch_add.field_boundary = 0;
        break;
    }

    UCT_TL_EP_STAT_ATOMIC(&ep->super.super);
    uct_rc_verbs_exp_post_send(ep, &wr, force_sig);
    uct_rc_verbs_ep_push_desc(ep, desc);
    return success;
}
Example #9
0
static UCS_F_ALWAYS_INLINE void
uct_rc_verbs_ep_atomic_post(uct_rc_verbs_ep_t *ep, int opcode, uint64_t compare_add,
                            uint64_t swap, uint64_t remote_addr, uct_rkey_t rkey,
                            uct_rc_iface_send_desc_t *desc, int force_sig)
{
    struct ibv_send_wr wr;
    struct ibv_sge sge;

    wr.sg_list               = &sge;
    wr.num_sge               = 1;
    wr.opcode                = opcode;
    wr.wr.atomic.compare_add = compare_add;
    wr.wr.atomic.swap        = swap;
    wr.wr.atomic.remote_addr = remote_addr;
    wr.wr.atomic.rkey        = rkey;
    sge.length               = sizeof(uint64_t);

    UCT_TL_EP_STAT_ATOMIC(&ep->super.super);
    uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, force_sig);
}
Example #10
0
static UCS_F_ALWAYS_INLINE ucs_status_t
uct_rc_mlx5_ep_atomic_post(uct_rc_mlx5_ep_t *ep, unsigned opcode,
                           uct_rc_iface_send_desc_t *desc, unsigned length,
                           uint64_t remote_addr, uct_rkey_t rkey,
                           uint64_t compare_mask, uint64_t compare,
                           uint64_t swap_add, int signal, ucs_status_t success)
{
    ucs_status_t status;

    desc->super.sn = ep->tx.sw_pi;
    status = uct_rc_mlx5_ep_dptr_post(ep, opcode, desc + 1, length, &desc->lkey,
                                      0, NULL, 0, remote_addr, rkey, compare_mask,
                                      compare, swap_add, signal);
    if (status != UCS_OK) {
        return status;
    }

    UCT_TL_EP_STAT_ATOMIC(&ep->super.super);
    ucs_queue_push(&ep->super.outstanding, &desc->super.queue);
    return success;
}
Example #11
0
ucs_status_t uct_ugni_ep_atomic_add32(uct_ep_h tl_ep, uint32_t add,
                                      uint64_t remote_addr, uct_rkey_t rkey)
{
    uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t);
    uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t);
    uct_ugni_rdma_fetch_desc_t *fma;

    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_famo, fma,
                             return UCS_ERR_NO_RESOURCE);
    uct_ugni_format_fma_amo(fma, GNI_POST_AMO, GNI_FMA_ATOMIC2_IADD_S,
                            (uint64_t)add, 0, NULL, remote_addr,
                            rkey, LEN_32, ep, NULL, NULL, NULL);
    ucs_trace_data("Posting AMO ADD, GNI_PostFma of size %"PRIx64" value"
                   "%"PRIx32" to %p, with [%"PRIx64" %"PRIx64"]",
                   fma->super.desc.length, add,
                   (void *)fma->super.desc.remote_addr,
                   fma->super.desc.remote_mem_hndl.qword1,
                   fma->super.desc.remote_mem_hndl.qword2);
    UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t));
    return uct_ugni_post_fma(iface, ep, &fma->super, UCS_OK);
}
Example #12
0
ucs_status_t uct_ugni_ep_atomic_cswap32(uct_ep_h tl_ep, uint32_t compare, uint32_t swap,
                                        uint64_t remote_addr, uct_rkey_t rkey,
                                        uint32_t *result, uct_completion_t *comp)
{
    uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t);
    uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t);
    uct_ugni_rdma_fetch_desc_t *fma;

    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_famo, fma,
                             return UCS_ERR_NO_RESOURCE);
    uct_ugni_format_fma_amo(fma, GNI_POST_AMO, GNI_FMA_ATOMIC2_FCSWAP_S,
                            (uint64_t)compare, (uint64_t)swap, fma + 1, remote_addr,
                            rkey, LEN_32, ep, comp, uct_ugni_amo_unpack32, (void *)result);
    ucs_trace_data("Posting AMO CSWAP, GNI_PostFma of size %"PRIx64" value"
                   "%"PRIx32" compare %"PRIx32" to %p, with [%"PRIx64" %"PRIx64"]",
                   fma->super.desc.length, swap, compare,
                   (void *)fma->super.desc.remote_addr,
                   fma->super.desc.remote_mem_hndl.qword1,
                   fma->super.desc.remote_mem_hndl.qword2);
    UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t));
    return uct_ugni_post_fma(iface, ep, &fma->super, UCS_INPROGRESS);
}
Example #13
0
ucs_status_t uct_ugni_ep_atomic_op64(uct_ep_h tl_ep, uint64_t op,
                                     uint64_t remote_addr, uct_rkey_t rkey,
                                     gni_fma_cmd_type_t op_type, char *op_str)
{
    uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t);
    uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t);
    uct_ugni_rdma_fetch_desc_t *fma;

    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_famo, fma,
                             return UCS_ERR_NO_RESOURCE);
    uct_ugni_format_fma_amo(fma, GNI_POST_AMO, op_type,
                            op, 0, NULL, remote_addr,
                            rkey, LEN_64, ep, NULL, NULL, NULL);
    ucs_trace_data("Posting AMO %s, GNI_PostFma of size %"PRIx64" value"
                   "%"PRIx64" to %p, with [%"PRIx64" %"PRIx64"]",
                   op_str, fma->super.desc.length, op,
                   (void *)fma->super.desc.remote_addr,
                   fma->super.desc.remote_mem_hndl.qword1,
                   fma->super.desc.remote_mem_hndl.qword2);
    UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t));
    return uct_ugni_post_fma(iface, ep, &fma->super, UCS_OK);
}
Example #14
0
static UCS_F_ALWAYS_INLINE void
uct_dc_mlx5_iface_atomic_post(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep,
                              unsigned opcode, uct_rc_iface_send_desc_t *desc, unsigned length,
                              uint64_t remote_addr, uct_rkey_t rkey,
                              uint64_t compare_mask, uint64_t compare, uint64_t swap_add)
{
    UCT_DC_MLX5_TXQP_DECL(txqp, txwq);

    UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq);

    desc->super.sn = txwq->sw_pi;
    uct_rc_mlx5_txqp_dptr_post(&iface->super.super, IBV_EXP_QPT_DC_INI, txqp, txwq,
                               opcode, desc + 1, length, &desc->lkey,
                               0, NULL, 0,
                               remote_addr + ep->super.umr_offset,
                               uct_ib_md_umr_rkey(rkey),
                               compare_mask, compare, swap_add,
                               &ep->av, uct_ib_mlx5_wqe_av_size(&ep->av),
                               MLX5_WQE_CTRL_CQ_UPDATE);

    UCT_TL_EP_STAT_ATOMIC(&ep->super.super);
    uct_rc_txqp_add_send_op(txqp, &desc->super);
}
Example #15
0
static UCS_F_ALWAYS_INLINE void
uct_rc_mlx5_ep_atomic_post(uct_rc_mlx5_ep_t *ep, unsigned opcode,
                           uct_rc_iface_send_desc_t *desc, unsigned length,
                           uint64_t remote_addr, uct_rkey_t rkey,
                           uint64_t compare_mask, uint64_t compare,
                           uint64_t swap_add, int signal)
{
    uct_rc_iface_t *iface  = ucs_derived_of(ep->super.super.super.iface,
                                            uct_rc_iface_t);
    uint32_t ib_rkey = uct_ib_resolve_atomic_rkey(rkey, ep->super.atomic_mr_offset,
                                                  &remote_addr);

    desc->super.sn = ep->tx.wq.sw_pi;
    uct_rc_mlx5_txqp_dptr_post(iface, IBV_QPT_RC,
                               &ep->super.txqp, &ep->tx.wq,
                               opcode, desc + 1, length, &desc->lkey,
                               0, NULL, 0, remote_addr, ib_rkey,
                               compare_mask, compare, swap_add,
                               NULL, 0, signal);

    UCT_TL_EP_STAT_ATOMIC(&ep->super.super);
    uct_rc_txqp_add_send_op(&ep->super.txqp, &desc->super);
}