Example #1
0
static ucs_status_t recieve_datagram(uct_ugni_udt_iface_t *iface, uint64_t id, uct_ugni_udt_ep_t **ep_out)
{
    uint32_t rem_addr, rem_id;
    gni_post_state_t post_state;
    gni_return_t ugni_rc;
    uct_ugni_udt_ep_t *ep;
    gni_ep_handle_t gni_ep;
    uct_ugni_udt_desc_t *desc;
    uct_ugni_udt_header_t *header;

    ucs_trace_func("iface=%p, id=%lx", iface, id);

    if (UCT_UGNI_UDT_ANY == id) {
        ep = NULL;
        gni_ep = iface->ep_any;
        desc = iface->desc_any;
    } else {
        ep = ucs_derived_of(uct_ugni_iface_lookup_ep(&iface->super, id),
                            uct_ugni_udt_ep_t);
        gni_ep = ep->super.ep;
        desc = ep->posted_desc;
    }

    *ep_out = ep;
    uct_ugni_device_lock(&iface->super.cdm);
    ugni_rc = GNI_EpPostDataWaitById(gni_ep, id, -1, &post_state, &rem_addr, &rem_id);
    uct_ugni_device_unlock(&iface->super.cdm);
    if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) {
        ucs_error("GNI_EpPostDataWaitById, id=%lu Error status: %s %d",
                  id, gni_err_str[ugni_rc], ugni_rc);
        return UCS_ERR_IO_ERROR;
    }
    if (GNI_POST_TERMINATED == post_state) {
        return UCS_ERR_CANCELED;
    }

    if (GNI_POST_COMPLETED != post_state) {
        ucs_error("GNI_EpPostDataWaitById gave unexpected response: %u", post_state);
        return UCS_ERR_IO_ERROR;
    }

    if (UCT_UGNI_UDT_ANY != id) {
        --iface->super.outstanding;
    }

    header = uct_ugni_udt_get_rheader(desc, iface);

    ucs_trace("Got datagram id: %lu type: %i len: %i am_id: %i", id, header->type, header->length, header->am_id);

    if (UCT_UGNI_UDT_PAYLOAD != header->type) {
        /* ack message, no data */
        ucs_assert_always(NULL != ep);
        ucs_mpool_put(ep->posted_desc);
        uct_ugni_check_flush(ep->desc_flush_group);
        ep->posted_desc = NULL;
        return UCS_OK;
    }

    return UCS_INPROGRESS;
}
Example #2
0
static void uct_ugni_udt_clean_wildcard(uct_ugni_udt_iface_t *iface)
{
    gni_return_t ugni_rc;
    uint32_t rem_addr, rem_id;
    gni_post_state_t post_state;
    uct_ugni_device_lock(&iface->super.cdm);
    ugni_rc = GNI_EpPostDataCancelById(iface->ep_any, UCT_UGNI_UDT_ANY);
    if (GNI_RC_SUCCESS != ugni_rc) {
        uct_ugni_device_unlock(&iface->super.cdm);
        ucs_error("GNI_EpPostDataCancel failed, Error status: %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
        return;
    }
    ugni_rc = GNI_EpPostDataTestById(iface->ep_any, UCT_UGNI_UDT_ANY, &post_state, &rem_addr, &rem_id);
    if (GNI_RC_SUCCESS != ugni_rc) {
        if (GNI_RC_NO_MATCH != ugni_rc) {
            uct_ugni_device_unlock(&iface->super.cdm);
            ucs_error("GNI_EpPostDataTestById failed, Error status: %s %d",
                      gni_err_str[ugni_rc], ugni_rc);
            return;
        }
    } else {
        if (GNI_POST_PENDING == post_state) {
            ugni_rc = GNI_EpPostDataWaitById(iface->ep_any, UCT_UGNI_UDT_ANY, -1, &post_state, &rem_addr, &rem_id);
        }
    }
    ugni_rc = GNI_EpDestroy(iface->ep_any);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_error("GNI_EpDestroy failed, Error status: %s %d\n",
                  gni_err_str[ugni_rc], ugni_rc);
    }
    uct_ugni_device_unlock(&iface->super.cdm);
}
static inline int
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
{
    uint32_t remote_addr, remote_id;
    mca_btl_base_endpoint_t *ep;
    gni_post_state_t post_state;
    gni_ep_handle_t handle;
    uint64_t datagram_id;
    gni_return_t grc;
    int count = 0;

    /* check for datagram completion */
    grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id);
    if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) {
        return 0;
    }

    if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) ==
        MCA_BTL_UGNI_CONNECT_WILDCARD_ID) {
        handle = ugni_module->wildcard_ep;
    } else {
        handle =
            ugni_module->endpoints[(uint32_t)(datagram_id & 0xffffffffull)]->smsg_ep_handle;
    }

    /* wait for the incoming datagram to complete (in case it isn't) */
    grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state,
                                  &remote_addr, &remote_id);
    if (GNI_RC_SUCCESS != grc) {
        BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc));
        return ompi_common_rc_ugni_to_ompi (grc);
    }

    BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, "
                 "peer = %d", datagram_id, post_state, remote_id));

    ep = ugni_module->endpoints[remote_id];

    /* NTH: TODO -- error handling */
    (void) mca_btl_ugni_ep_connect_progress (ep);

    if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) {
        /*  process messages waiting in the endpoint's smsg mailbox */
        count = mca_btl_ugni_smsg_process (ep);
    }

    /* repost the wildcard datagram */
    if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) ==
        MCA_BTL_UGNI_CONNECT_WILDCARD_ID) {
        mca_btl_ugni_wildcard_ep_post (ugni_module);
    }

    return count;
}
Example #4
0
static inline int
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
{
    uint64_t datagram_id, data, proc_id;
    uint32_t remote_addr, remote_id;
    mca_btl_base_endpoint_t *ep;
    gni_post_state_t post_state;
    gni_ep_handle_t handle;
    gni_return_t grc;
    int count = 0, rc;

    /* check for datagram completion */
    OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);  /* TODO: may not need lock for this function */
    grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id);
    if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) {
        OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
        return 0;
    }

    data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK);

    BTL_VERBOSE(("datgram_id: %" PRIx64 ", mask: %" PRIx64, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK)));

    if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) {
        ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&ugni_module->endpoints, data);
        handle = ep->smsg_ep_handle;
    } else {
        handle = ugni_module->wildcard_ep;
    }

    /* wait for the incoming datagram to complete (in case it isn't) */
    grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state,
                                  &remote_addr, &remote_id);
    OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
    if (GNI_RC_SUCCESS != grc) {
        BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc));
        return opal_common_rc_ugni_to_opal (grc);
    }

    /* if this is a wildcard endpoint lookup the remote peer by the proc id we received */
    if (handle == ugni_module->wildcard_ep) {
        proc_id = mca_btl_ugni_proc_name_to_id (ugni_module->wc_remote_attr.proc_name);

        BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64,
                     proc_id));

        OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
        rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
        OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);

        /* check if the endpoint is known */
        if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) {
            struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name);
            BTL_VERBOSE(("Got connection request from an unknown peer {jobid = 0x%x, vid = 0x%x}",
                         ugni_module->wc_remote_attr.proc_name.jobid, ugni_module->wc_remote_attr.proc_name.vpid));
            ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc);
            if (OPAL_UNLIKELY(NULL == ep)) {
                return rc;
            }
        }
    } else {
        BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));
    }

    /* should not have gotten a NULL endpoint */
    assert (NULL != ep);

    BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, "
                 "data = 0x%" PRIx64 ", ep = %p, remote id: %d", datagram_id, post_state,
                 data, (void *) ep, remote_id));

    /* NTH: TODO -- error handling */
    opal_mutex_lock (&ep->lock);
    if (handle != ugni_module->wildcard_ep) {
        /* directed post complete */
        ep->dg_posted = false;
    }

    (void) mca_btl_ugni_ep_connect_progress (ep);
    opal_mutex_unlock (&ep->lock);

    if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) {
        /*  process messages waiting in the endpoint's smsg mailbox */
        count = mca_btl_ugni_smsg_process (ep);
    }

    /* repost the wildcard datagram */
    if (handle == ugni_module->wildcard_ep) {
        mca_btl_ugni_wildcard_ep_post (ugni_module);
    }

    return count;
}
Example #5
0
static void uct_ugni_udt_progress(void *arg)
{
    uint32_t rem_addr,
             rem_id;
    uint64_t id;
    void *payload;
    void *user_desc;

    ucs_status_t status;

    uct_ugni_udt_desc_t *desc;
    uct_ugni_udt_header_t *header;
    uct_ugni_udt_iface_t * iface = (uct_ugni_udt_iface_t *)arg;
    uct_ugni_udt_ep_t *ep;

    gni_ep_handle_t ugni_ep;
    gni_post_state_t post_state;
    gni_return_t ugni_rc;

    pthread_mutex_lock(&uct_ugni_global_lock);
    ugni_rc = GNI_PostDataProbeById(iface->super.nic_handle, &id);
    if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) {
        if (GNI_RC_NO_MATCH != ugni_rc) {
            ucs_error("GNI_PostDataProbeById , Error status: %s %d",
                      gni_err_str[ugni_rc], ugni_rc);
        }
        goto exit;
    }

    if (UCT_UGNI_UDT_ANY == id) {
        /* New incomming message */
        ep = NULL;
        ugni_ep = iface->ep_any;
        desc = iface->desc_any;
    } else {
        /* Ack message */
        ep = ucs_derived_of(uct_ugni_iface_lookup_ep(&iface->super, id),
                            uct_ugni_udt_ep_t);
        if (ucs_unlikely(NULL == ep)) {
            ucs_error("Can not lookup ep with id %"PRIx64,id);
            goto exit;
        }
        ugni_ep = ep->super.ep;
        desc = ep->posted_desc;
    }

    ugni_rc = GNI_EpPostDataWaitById(ugni_ep, id, -1, &post_state, &rem_addr, &rem_id);
    if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) {
        ucs_error("GNI_EpPostDataWaitById, Error status: %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
        goto exit;
    }

    header = uct_ugni_udt_get_rheader(desc, iface);
    payload = uct_ugni_udt_get_rpayload(desc, iface);
    user_desc = uct_ugni_udt_get_user_desc(desc, iface);

    if (UCT_UGNI_UDT_ANY == id) {
        /* New incomming message */
        ucs_assert_always(header->type == UCT_UGNI_UDT_PAYLOAD);
        uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_RECV,
                           header->am_id, payload, header->length, "RX: AM");
        status = uct_iface_invoke_am(&iface->super.super, header->am_id, payload,
                                     header->length, user_desc);
        if (UCS_OK != status) {
            uct_ugni_udt_desc_t *new_desc;
            /* set iface for a later release call */
            uct_recv_desc_iface(user_desc) = &iface->super.super.super;
            /* Allocate a new element */
            UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc,
                                     new_desc, goto exit);
            /* set the new desc */
            iface->desc_any = new_desc;
        }
Example #6
0
static inline int
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
{
    uint32_t remote_addr, remote_id;
    uint64_t datagram_id, data;
    mca_btl_base_endpoint_t *ep;
    gni_post_state_t post_state;
    gni_ep_handle_t handle;
    gni_return_t grc;
    int count = 0, rc;

    /* check for datagram completion */
    grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id);
    if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) {
        return 0;
    }

    data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK);

    BTL_VERBOSE(("datgram_id: %" PRIx64 ", mask: %" PRIx64, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK)));

    if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) {
        ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&ugni_module->endpoints, data);
        handle = ep->smsg_ep_handle;
    } else {
        handle = ugni_module->wildcard_ep;
    }

    /* wait for the incoming datagram to complete (in case it isn't) */
    grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state,
                                  &remote_addr, &remote_id);
    if (GNI_RC_SUCCESS != grc) {
        BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc));
        return ompi_common_rc_ugni_to_ompi (grc);
    }

    /* if this is a wildcard endpoint lookup the remote peer by the proc id we received */
    if (handle == ugni_module->wildcard_ep) {
        BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64, ugni_module->wc_remote_attr.proc_id));
        rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint,
                                               ugni_module->wc_remote_attr.proc_id,
                                               (void *) &ep);
        /* check if the endpoint is known */
        if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) {
            BTL_ERROR(("received connection attempt from an unknown peer. rc: %d, ep: %p, id: 0x%" PRIx64,
                       rc, ep, ugni_module->wc_remote_attr.proc_id));
            return OMPI_ERR_NOT_FOUND;
        }
    } else {
        BTL_VERBOSE(("directed datagram complete for endpoint %p", ep));
    }

    /* should not have gotten a NULL endpoint */
    assert (NULL != ep);

    BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, "
                 "data = 0x%" PRIx64 ", ep = %p, remote id: %d", datagram_id, post_state,
                 data, ep, remote_id));

    /* NTH: TODO -- error handling */
    (void) mca_btl_ugni_ep_connect_progress (ep);

    if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) {
        /*  process messages waiting in the endpoint's smsg mailbox */
        count = mca_btl_ugni_smsg_process (ep);
    }

    /* repost the wildcard datagram */
    if (handle == ugni_module->wildcard_ep) {
        mca_btl_ugni_wildcard_ep_post (ugni_module);
    }

    return count;
}