Beispiel #1
0
/* read all events from recv_evd_handle */
static
void psdapl_flush_evd(psdapl_con_info_t *ci)
{
	while (1) {
		DAT_RETURN dat_rc;
		DAT_EVENT event;
		DAT_COUNT nmore = 0;
#if 0
		dat_rc = dat_evd_wait(ci->recv_evd_handle,
				      0 /*timeout in usec*/,
				      1 /* threshold */,
				      &event, &nmore);
#else
		dat_rc = dat_evd_dequeue(ci->recv_evd_handle, &event);
		nmore = 1;
#endif

		switch (DAT_GET_TYPE(dat_rc)) {
		case DAT_TIMEOUT_EXPIRED:
			// psdapl_dprint(3, "psdapl_flush_evd event DAT_TIMEOUT_EXPIRED. nmore:%d", nmore);
			ci->outstanding_cq_entries = 0;
			psdapl_stat.timeouts++;
			break;
		case DAT_SUCCESS:
			switch (event.event_number) {
			case DAT_DTO_COMPLETION_EVENT:
				if (!event.event_data.dto_completion_event_data.user_cookie.as_ptr) {
					// From sendv
					if (ci->outstanding_cq_entries) {
						ci->outstanding_cq_entries--;
					}
				} else {
					do_DTO_COMPLETION_EVENT(ci, &event.event_data.dto_completion_event_data);
				}
				// psdapl_dprint(3, "psdapl_flush_evd event DAT_DTO_COMPLETION_EVENT. nmore:%d", nmore);
				break;
			default:
				psdapl_dprint(1, "psdapl_flush_evd: unexpected event 0x%x. nmore:%d",
					      (unsigned)event.event_number, nmore);
				break;
			}
			break;
		case DAT_QUEUE_EMPTY:
			nmore = 0;
			break;
		default:
			nmore = 0;
			psdapl_dprint_dat_err(1, dat_rc, "psdapl_flush_evd: dat_evd_wait(). nmore:%d",
					      nmore);
		}

		if (!nmore) break;
	}
}
int mca_btl_udapl_component_progress()
{
    mca_btl_udapl_module_t* btl;
    static int32_t inprogress = 0;
    DAT_EVENT event;
    size_t i;
    int32_t j, rdma_ep_count;
    int count = 0, btl_ownership;
    mca_btl_udapl_frag_t* frag;
    mca_btl_base_endpoint_t* endpoint;

    /* prevent deadlock - only one thread should be 'progressing' at a time */
    if(OPAL_THREAD_ADD32(&inprogress, 1) > 1) {
        OPAL_THREAD_ADD32(&inprogress, -1);
        return OMPI_SUCCESS;
    }

    /* check for work to do on each uDAPL btl */
    OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock);
    for(i = 0; i < mca_btl_udapl_component.udapl_num_btls; i++) {
        btl = mca_btl_udapl_component.udapl_btls[i];

        /* Check DTO EVD */
        while(DAT_SUCCESS ==
                dat_evd_dequeue(btl->udapl_evd_dto, &event)) {
            DAT_DTO_COMPLETION_EVENT_DATA* dto;

            switch(event.event_number) {
            case DAT_DTO_COMPLETION_EVENT:
                dto = &event.event_data.dto_completion_event_data;

                frag = dto->user_cookie.as_ptr;

                /* Was the DTO successful? */
                if(DAT_DTO_SUCCESS != dto->status) {

                    if (DAT_DTO_ERR_FLUSHED == dto->status) {

                        BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM,
                            ("DAT_DTO_ERR_FLUSHED: probably OK if occurs during MPI_Finalize().\n"));
                    } else {

                        BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
                            ("ERROR: DAT_DTO_COMPLETION_EVENT: %d %d %lu %p.\n",
                                dto->status, frag->type,
                                (unsigned long)frag->size, dto->ep_handle));
                    }
                    return OMPI_ERROR;		    
                }
                endpoint = frag->endpoint;
                btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);

                switch(frag->type) {
                case MCA_BTL_UDAPL_RDMA_WRITE:
                {
                    assert(frag->base.des_src == &frag->segment);
                    assert(frag->base.des_src_cnt == 1);
                    assert(frag->base.des_dst == NULL);
                    assert(frag->base.des_dst_cnt == 0);
                    assert(frag->type == MCA_BTL_UDAPL_RDMA_WRITE);
    
                    frag->base.des_cbfunc(&btl->super, endpoint,
                        &frag->base, OMPI_SUCCESS);
                    if( btl_ownership ) {
                        mca_btl_udapl_free(&btl->super,
                            &frag->base);
                    }

                    OPAL_THREAD_ADD32(&(endpoint->endpoint_lwqe_tokens[BTL_UDAPL_EAGER_CONNECTION]), 1);

		    mca_btl_udapl_frag_progress_pending(btl,
                        endpoint, BTL_UDAPL_EAGER_CONNECTION);

                    break;
                }
                case MCA_BTL_UDAPL_SEND:
                {
                    int connection = BTL_UDAPL_EAGER_CONNECTION;

                    assert(frag->base.des_src == &frag->segment);
                    assert(frag->base.des_src_cnt == 1);
                    assert(frag->base.des_dst == NULL);
                    assert(frag->base.des_dst_cnt == 0);
                    assert(frag->type == MCA_BTL_UDAPL_SEND);

                    if(frag->size !=
                            mca_btl_udapl_component.udapl_eager_frag_size) {
                        assert(frag->size ==
                            mca_btl_udapl_component.udapl_max_frag_size);

                        connection = BTL_UDAPL_MAX_CONNECTION;
                    }
                    frag->base.des_cbfunc(&btl->super, endpoint,
                            &frag->base, OMPI_SUCCESS);
                    if( btl_ownership ) {
                        mca_btl_udapl_free(&btl->super,
                            &frag->base);
                    }

                    OPAL_THREAD_ADD32(&(endpoint->endpoint_lwqe_tokens[connection]), 1);

                    mca_btl_udapl_frag_progress_pending(btl,
                        endpoint, connection);
                    break;
                }
                case MCA_BTL_UDAPL_RECV:
                {
                    mca_btl_active_message_callback_t* reg;
                    int cntrl_msg = -1;

                    assert(frag->base.des_dst == &frag->segment);
                    assert(frag->base.des_dst_cnt == 1);
                    assert(frag->base.des_src == NULL);
                    assert(frag->base.des_src_cnt == 0);
                    assert(frag->type == MCA_BTL_UDAPL_RECV);
                    assert(frag->triplet.virtual_address ==
                            (DAT_VADDR)(uintptr_t)frag->segment.seg_addr.pval);
                    assert(frag->triplet.segment_length == frag->size);
                    assert(frag->btl == btl);

                    /* setup frag ftr location and do callback */
                    frag->segment.seg_len = dto->transfered_length -
                        sizeof(mca_btl_udapl_footer_t);
                    frag->ftr = (mca_btl_udapl_footer_t *)
                        ((char *)frag->segment.seg_addr.pval + 
                        frag->segment.seg_len);

                    cntrl_msg = frag->ftr->tag;

                    reg = mca_btl_base_active_message_trigger + frag->ftr->tag;
                    OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock);

                    reg->cbfunc(&btl->super,
                            frag->ftr->tag, &frag->base, reg->cbdata);
                    OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock);

                    /* Repost the frag */
                    frag->ftr = frag->segment.seg_addr.pval;
                    frag->segment.seg_len =
                        (frag->size - sizeof(mca_btl_udapl_footer_t) -
                            sizeof(mca_btl_udapl_rdma_footer_t)); 
                    frag->base.des_flags = 0;

                    if(frag->size ==
                              mca_btl_udapl_component.udapl_eager_frag_size) {

                        OPAL_THREAD_ADD32(&(frag->endpoint->endpoint_sr_credits[BTL_UDAPL_EAGER_CONNECTION]), 1);

                        dat_ep_post_recv(frag->endpoint->endpoint_eager,
                            1, &frag->triplet, dto->user_cookie,
                            DAT_COMPLETION_DEFAULT_FLAG);

                        if (frag->endpoint->endpoint_sr_credits[BTL_UDAPL_EAGER_CONNECTION] >=
                            mca_btl_udapl_component.udapl_sr_win) {
                            mca_btl_udapl_endpoint_send_sr_credits(frag->endpoint,
                                BTL_UDAPL_EAGER_CONNECTION);
                        }

                        if (MCA_BTL_TAG_UDAPL == cntrl_msg) {
                            mca_btl_udapl_frag_progress_pending(btl,
                                frag->endpoint,
                                BTL_UDAPL_EAGER_CONNECTION);
                        }

                    } else {
                        assert(frag->size ==
                            mca_btl_udapl_component.udapl_max_frag_size);

                        OPAL_THREAD_ADD32(&(frag->endpoint->endpoint_sr_credits[BTL_UDAPL_MAX_CONNECTION]), 1);

                        dat_ep_post_recv(frag->endpoint->endpoint_max,
                            1, &frag->triplet, dto->user_cookie,
                            DAT_COMPLETION_DEFAULT_FLAG);

                        if (frag->endpoint->endpoint_sr_credits[BTL_UDAPL_MAX_CONNECTION] >=
                            mca_btl_udapl_component.udapl_sr_win) {
                            mca_btl_udapl_endpoint_send_sr_credits(frag->endpoint,
                                BTL_UDAPL_MAX_CONNECTION);
                        }

                        if (MCA_BTL_TAG_UDAPL == cntrl_msg) {
                            mca_btl_udapl_frag_progress_pending(btl,
                                frag->endpoint,
                                BTL_UDAPL_MAX_CONNECTION);
                        }
                    }

                    break;
                }
                case MCA_BTL_UDAPL_PUT:
                {
                    assert(frag->base.des_src == &frag->segment);
                    assert(frag->base.des_src_cnt == 1);
                    assert(frag->base.des_dst_cnt == 1);
                    assert(frag->type == MCA_BTL_UDAPL_PUT);
                    
                    frag->base.des_cbfunc(&btl->super, endpoint,
                        &frag->base, OMPI_SUCCESS);
                    if( btl_ownership ) {
                        mca_btl_udapl_free(&btl->super,
                            &frag->base);
                    }

                    OPAL_THREAD_ADD32(&(endpoint->endpoint_lwqe_tokens[BTL_UDAPL_MAX_CONNECTION]), 1);
                    OPAL_THREAD_ADD32(&(endpoint->endpoint_sr_tokens[BTL_UDAPL_MAX_CONNECTION]), 1);

                    mca_btl_udapl_frag_progress_pending(btl,
                        endpoint, BTL_UDAPL_MAX_CONNECTION);
         
                    break;
                }                    
                case MCA_BTL_UDAPL_CONN_RECV:
                    mca_btl_udapl_endpoint_finish_connect(btl,
                            frag->segment.seg_addr.pval,
                            (int32_t *)((char *)frag->segment.seg_addr.pval  +
                                sizeof(mca_btl_udapl_addr_t)),
                            event.event_data.connect_event_data.ep_handle);
                    /* No break - fall through to free */
                case MCA_BTL_UDAPL_CONN_SEND:
                    frag->segment.seg_len =
                            mca_btl_udapl_module.super.btl_eager_limit;
                    mca_btl_udapl_free(&btl->super, &frag->base);
                    break;
                default:
                    BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DIAGNOSE,
                        ("WARNING: unknown frag type: %d\n",
                        frag->type));
                }
                count++;
                break;
            default:
                BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DIAGNOSE,
                    ("WARNING: DTO event: %s (%d)\n",
                    mca_btl_udapl_dat_event_to_string(event.event_number),
                    event.event_number));
            }
        }

        /* Check connection EVD */
        while((btl->udapl_connect_inprogress > 0) && (DAT_SUCCESS ==
            dat_evd_dequeue(btl->udapl_evd_conn, &event))) {

            switch(event.event_number) {
                case DAT_CONNECTION_REQUEST_EVENT:
                    /* Accept a new connection */
                    mca_btl_udapl_accept_connect(btl,
                            event.event_data.cr_arrival_event_data.cr_handle);
                    count++;
                    break;
                case DAT_CONNECTION_EVENT_ESTABLISHED:
                    /* Both the client and server side of a connection generate
                       this event */
                    if (mca_btl_udapl_component.udapl_conn_priv_data) {
                        /* private data is only valid at this point if this 
                         * event is from a dat_ep_connect call, not an accept
                         */
                        mca_btl_udapl_endpoint_pd_established_conn(btl,
                            event.event_data.connect_event_data.ep_handle);
                    } else {
                        /* explicitly exchange process data */
                        mca_btl_udapl_sendrecv(btl,
                            event.event_data.connect_event_data.ep_handle);
                    }
                    count++;
                    break;
                case DAT_CONNECTION_EVENT_PEER_REJECTED:
                case DAT_CONNECTION_EVENT_NON_PEER_REJECTED:
                case DAT_CONNECTION_EVENT_ACCEPT_COMPLETION_ERROR:
                case DAT_CONNECTION_EVENT_DISCONNECTED:
                case DAT_CONNECTION_EVENT_BROKEN:
                case DAT_CONNECTION_EVENT_TIMED_OUT:
                    /* handle this case specially? if we have finite timeout,
                       we might want to try connecting again here. */
                case DAT_CONNECTION_EVENT_UNREACHABLE:
                    /* Need to set the BTL endpoint to MCA_BTL_UDAPL_FAILED
                       See dat_ep_connect documentation pdf pg 198 */
                    BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
                        ("WARNING: connection event not handled : %s (%d)\n",
                        mca_btl_udapl_dat_event_to_string(event.event_number),
                        event.event_number));
                    break;
                default:
                    BTL_ERROR(("ERROR: connection event : %s (%d)",
                        mca_btl_udapl_dat_event_to_string(event.event_number),
                        event.event_number));
            }
        }

        /* Check async EVD */
        if (btl->udapl_async_events == mca_btl_udapl_component.udapl_async_events) {
            btl->udapl_async_events = 0;

            while(DAT_SUCCESS ==
                dat_evd_dequeue(btl->udapl_evd_async, &event)) {

                switch(event.event_number) {
                case DAT_ASYNC_ERROR_EVD_OVERFLOW:
                case DAT_ASYNC_ERROR_IA_CATASTROPHIC:
                case DAT_ASYNC_ERROR_EP_BROKEN:
                case DAT_ASYNC_ERROR_TIMED_OUT:
                case DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR:
                    BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
                        ("WARNING: async event ignored : %s (%d)",
                        mca_btl_udapl_dat_event_to_string(event.event_number),
                        event.event_number));
                    break;
                default:
                    BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
                        ("WARNING: %s (%d)\n",
                        mca_btl_udapl_dat_event_to_string(event.event_number),
                        event.event_number));
                }
            }
        } else {
            btl->udapl_async_events++;
        }

        /*
         * Check eager rdma segments
         */
        
        /* find the number of endpoints with rdma buffers */
        rdma_ep_count = btl->udapl_eager_rdma_endpoint_count;
        
        for (j = 0; j < rdma_ep_count; j++) {
            mca_btl_udapl_endpoint_t* endpoint;
            mca_btl_udapl_frag_t *local_rdma_frag;

            endpoint =
                opal_pointer_array_get_item(btl->udapl_eager_rdma_endpoints, j);

            OPAL_THREAD_LOCK(&endpoint->endpoint_eager_rdma_local.lock);

            local_rdma_frag =             
                MCA_BTL_UDAPL_GET_LOCAL_RDMA_FRAG(endpoint,
                    endpoint->endpoint_eager_rdma_local.head);

            if (local_rdma_frag->rdma_ftr->active == 1) {
                int pad = 0;
                mca_btl_active_message_callback_t* reg;

                MCA_BTL_UDAPL_RDMA_NEXT_INDEX(endpoint->endpoint_eager_rdma_local.head);
                OPAL_THREAD_UNLOCK(&endpoint->endpoint_eager_rdma_local.lock);

                /* compute pad as needed */
                MCA_BTL_UDAPL_FRAG_CALC_ALIGNMENT_PAD(pad,
                    (local_rdma_frag->rdma_ftr->size +
                        sizeof(mca_btl_udapl_footer_t)));
                
                /* set fragment information */
                local_rdma_frag->ftr = (mca_btl_udapl_footer_t *)
                    ((char *)local_rdma_frag->rdma_ftr -
                        pad -
                        sizeof(mca_btl_udapl_footer_t));
                local_rdma_frag->segment.seg_len =
                    local_rdma_frag->rdma_ftr->size;
                local_rdma_frag->segment.seg_addr.pval = (unsigned char *)
                    ((char *)local_rdma_frag->ftr -
                        local_rdma_frag->segment.seg_len);

                /* trigger callback */
                reg = mca_btl_base_active_message_trigger + local_rdma_frag->ftr->tag;
                reg->cbfunc(&btl->super,
                    local_rdma_frag->ftr->tag, &local_rdma_frag->base, reg->cbdata);

                /* repost */
                local_rdma_frag->rdma_ftr->active = 0; 
                local_rdma_frag->segment.seg_len =
                    mca_btl_udapl_module.super.btl_eager_limit;
                local_rdma_frag->base.des_flags = 0;

                /* increment local rdma credits */
                OPAL_THREAD_ADD32(&(endpoint->endpoint_eager_rdma_local.credits),
                    1);

                if (endpoint->endpoint_eager_rdma_local.credits >=
                    mca_btl_udapl_component.udapl_eager_rdma_win) {
                    mca_btl_udapl_endpoint_send_eager_rdma_credits(endpoint);
                }

                count++;

            } else {
                OPAL_THREAD_UNLOCK(&endpoint->endpoint_eager_rdma_local.lock);
            }
        } /* end of rdma_count loop */
    }

    /* unlock and return */
    OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock);
    OPAL_THREAD_ADD32(&inprogress, -1);
    return count;
}