Example #1
0
static ucs_status_t progress_local_cq(uct_ugni_smsg_iface_t *iface){
    gni_return_t ugni_rc;
    gni_cq_entry_t event_data;
    uct_ugni_smsg_desc_t message_data;
    uct_ugni_smsg_desc_t *message_pointer;

    ugni_rc = GNI_CqGetEvent(iface->super.local_cq, &event_data);

    if(GNI_RC_NOT_DONE == ugni_rc){
        return UCS_OK;
    }

    if((GNI_RC_SUCCESS != ugni_rc && !event_data) || GNI_CQ_OVERRUN(event_data)){
        /* TODO: handle overruns */
        ucs_error("Error posting data. CQ overrun = %d", (int)GNI_CQ_OVERRUN(event_data));
        return UCS_ERR_NO_RESOURCE;
    }

    message_data.msg_id = GNI_CQ_GET_MSG_ID(event_data);
    message_pointer = sglib_hashed_uct_ugni_smsg_desc_t_find_member(iface->smsg_list,&message_data);
    ucs_assert(NULL != message_pointer);
    message_pointer->ep->outstanding--;
    iface->super.outstanding--;
    uct_ugni_ep_check_flush(message_pointer->ep);
    sglib_hashed_uct_ugni_smsg_desc_t_delete(iface->smsg_list,message_pointer);
    ucs_mpool_put(message_pointer);
    return UCS_INPROGRESS;
}
Example #2
0
static void progress_local_cq(uct_ugni_smsg_iface_t *iface){
    gni_return_t ugni_rc;
    gni_cq_entry_t event_data;
    uct_ugni_smsg_desc_t message_data;
    uct_ugni_smsg_desc_t *message_pointer;

    if(0 == iface->super.outstanding){
        return;
    }

    ugni_rc = GNI_CqGetEvent(iface->super.local_cq, &event_data);
    if(GNI_RC_NOT_DONE == ugni_rc){
        return;
    }

    if((GNI_RC_SUCCESS != ugni_rc && !event_data) || GNI_CQ_OVERRUN(event_data)){
        /* TODO: handle overruns */
        ucs_error("Error posting data. CQ overrun = %d", (int)GNI_CQ_OVERRUN(event_data));
        return;
    }

    message_data.msg_id = GNI_CQ_GET_MSG_ID(event_data);
    message_pointer = sglib_hashed_uct_ugni_smsg_desc_t_find_member(iface->smsg_list,&message_data);
    message_pointer->ep->outstanding--;
    iface->super.outstanding--;
    sglib_hashed_uct_ugni_smsg_desc_t_delete(iface->smsg_list,message_pointer);
    ucs_mpool_put(message_pointer);
}
Example #3
0
int mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_module_t *btl)
{
    mca_btl_base_endpoint_t *ep;
    gni_cq_entry_t event_data;
    gni_return_t grc;
    uint64_t inst_id;

    grc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
    if (GNI_RC_NOT_DONE == grc) {
        return 0;
    }

    if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc || !GNI_CQ_STATUS_OK(event_data) ||
                      GNI_CQ_OVERRUN(event_data))) {
        if (GNI_RC_ERROR_RESOURCE == grc ||
            (GNI_RC_SUCCESS == grc && GNI_CQ_OVERRUN(event_data))) {
            /* recover from smsg cq overrun */
            return mca_btl_ugni_handle_remote_smsg_overrun (btl);
        }

        BTL_ERROR(("unhandled error in GNI_CqGetEvent"));

        /* unhandled error: crash */
        assert (0);
        return ompi_common_rc_ugni_to_ompi (grc);
    }

    BTL_VERBOSE(("REMOTE CQ: Got event 0x%" PRIx64 ". msg id = %" PRIu64
                 ". ok = %d, type = %" PRIu64 "\n", (uint64_t) event_data,
                 GNI_CQ_GET_MSG_ID(event_data), GNI_CQ_STATUS_OK(event_data),
                 GNI_CQ_GET_TYPE(event_data)));

    inst_id = GNI_CQ_GET_INST_ID(event_data);

    ep = btl->endpoints[inst_id & 0xffffffff];
    if (OPAL_UNLIKELY(MCA_BTL_UGNI_EP_STATE_CONNECTED != ep->state)) {
        /* due to the nature of datagrams we may get a smsg completion before
           we get mailbox info from the peer */
        BTL_VERBOSE(("event occurred on an unconnected endpoint! ep state = %d", ep->state));
        return 0;
    }

    return mca_btl_ugni_smsg_process (ep);
}