Esempio n. 1
0
static UCS_CLASS_CLEANUP_FUNC(uct_dc_ep_t)
{
    uct_dc_iface_t *iface = ucs_derived_of(self->super.super.iface, uct_dc_iface_t);

    uct_dc_ep_pending_purge(&self->super.super, NULL, NULL);
    ucs_arbiter_group_cleanup(&self->arb_group);
    uct_rc_fc_cleanup(&self->fc);

    ucs_assert_always(self->state != UCT_DC_EP_INVALID);

    if (self->dci == UCT_DC_EP_NO_DCI) {
        return;
    }

    /* TODO: this is good for dcs policy only.
     * Need to change if eps share dci
     */
    ucs_assertv_always(uct_dc_iface_dci_has_outstanding(iface, self->dci),
                       "iface (%p) ep (%p) dci leak detected: dci=%d", iface,
                       self, self->dci);

    /* we can handle it but well behaving app should not do this */
    ucs_warn("ep (%p) is destroyed with %d outstanding ops",
             self, (int16_t)iface->super.config.tx_qp_len -
             uct_rc_txqp_available(&iface->tx.dcis[self->dci].txqp));
    uct_rc_txqp_purge_outstanding(&iface->tx.dcis[self->dci].txqp, UCS_ERR_CANCELED, 1);
    iface->tx.dcis[self->dci].ep = NULL;
}
Esempio n. 2
0
ucs_status_t uct_dc_mlx5_ep_flush(uct_ep_h tl_ep, unsigned flags, uct_completion_t *comp)
{
    uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t);
    uct_dc_mlx5_ep_t    *ep    = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t);
    ucs_status_t        status;
    UCT_DC_MLX5_TXQP_DECL(txqp, txwq);

    if (ucs_unlikely(flags & UCT_FLUSH_FLAG_CANCEL)) {
        if (ep->dci != UCT_DC_MLX5_EP_NO_DCI) {
            uct_rc_txqp_purge_outstanding(&iface->tx.dcis[ep->dci].txqp,
                                          UCS_ERR_CANCELED, 0);
#if ENABLE_ASSERT
            iface->tx.dcis[ep->dci].flags |= UCT_DC_DCI_FLAG_EP_CANCELED;
#endif
        }

        uct_ep_pending_purge(tl_ep, NULL, 0);
        return UCS_OK;
    }

    if (!uct_rc_iface_has_tx_resources(&iface->super.super)) {
        return UCS_ERR_NO_RESOURCE;
    }

    if (ep->dci == UCT_DC_MLX5_EP_NO_DCI) {
        if (!uct_dc_mlx5_iface_dci_can_alloc(iface)) {
            return UCS_ERR_NO_RESOURCE; /* waiting for dci */
        } else {
            UCT_TL_EP_STAT_FLUSH(&ep->super); /* no sends */
            return UCS_OK;
        }
    }

    if (!uct_dc_mlx5_iface_dci_ep_can_send(ep)) {
        return UCS_ERR_NO_RESOURCE; /* cannot send */
    }

    status = uct_dc_mlx5_iface_flush_dci(iface, ep->dci);
    if (status == UCS_OK) {
        UCT_TL_EP_STAT_FLUSH(&ep->super);
        return UCS_OK; /* all sends completed */
    }

    ucs_assert(status == UCS_INPROGRESS);
    ucs_assert(ep->dci != UCT_DC_MLX5_EP_NO_DCI);

    UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq);

    return uct_rc_txqp_add_flush_comp(&iface->super.super, &ep->super, txqp,
                                      comp, txwq->sig_pi);
}
Esempio n. 3
0
ucs_status_t uct_rc_verbs_ep_handle_failure(uct_rc_verbs_ep_t *ep,
                                            ucs_status_t status)
{
    uct_rc_iface_t *iface = ucs_derived_of(ep->super.super.super.iface,
                                           uct_rc_iface_t);

    iface->tx.cq_available += ep->txcnt.pi - ep->txcnt.ci;
    /* Reset CI to prevent cq_available overrun on ep_destoroy */
    ep->txcnt.ci = ep->txcnt.pi;
    uct_rc_txqp_purge_outstanding(&ep->super.txqp, status, 0);

    return iface->super.ops->set_ep_failed(&iface->super, &ep->super.super.super,
                                           status);
}
Esempio n. 4
0
void uct_dc_ep_set_failed(ucs_class_t *ep_cls, uct_dc_iface_t *iface,
                          uint32_t qp_num)
{
    uint8_t dci = uct_dc_iface_dci_find(iface, qp_num);
    uct_dc_ep_t *ep = iface->tx.dcis[dci].ep;

    if (!ep) {
        return;
    }

    uct_rc_txqp_purge_outstanding(&iface->tx.dcis[dci].txqp,
                                  UCS_ERR_ENDPOINT_TIMEOUT, 0);
    uct_set_ep_failed(ep_cls, &ep->super.super,
                      &iface->super.super.super.super);
    if (UCS_OK != uct_dc_iface_dci_reconnect(iface, &iface->tx.dcis[dci].txqp)) {
        ucs_fatal("Unsuccessful reconnect of DC QP #%u", qp_num);
    }
    uct_rc_txqp_available_set(&iface->tx.dcis[dci].txqp,
                              iface->super.config.tx_qp_len);
}