/* read all events from recv_evd_handle */ static void psdapl_flush_evd(psdapl_con_info_t *ci) { while (1) { DAT_RETURN dat_rc; DAT_EVENT event; DAT_COUNT nmore = 0; #if 0 dat_rc = dat_evd_wait(ci->recv_evd_handle, 0 /*timeout in usec*/, 1 /* threshold */, &event, &nmore); #else dat_rc = dat_evd_dequeue(ci->recv_evd_handle, &event); nmore = 1; #endif switch (DAT_GET_TYPE(dat_rc)) { case DAT_TIMEOUT_EXPIRED: // psdapl_dprint(3, "psdapl_flush_evd event DAT_TIMEOUT_EXPIRED. nmore:%d", nmore); ci->outstanding_cq_entries = 0; psdapl_stat.timeouts++; break; case DAT_SUCCESS: switch (event.event_number) { case DAT_DTO_COMPLETION_EVENT: if (!event.event_data.dto_completion_event_data.user_cookie.as_ptr) { // From sendv if (ci->outstanding_cq_entries) { ci->outstanding_cq_entries--; } } else { do_DTO_COMPLETION_EVENT(ci, &event.event_data.dto_completion_event_data); } // psdapl_dprint(3, "psdapl_flush_evd event DAT_DTO_COMPLETION_EVENT. nmore:%d", nmore); break; default: psdapl_dprint(1, "psdapl_flush_evd: unexpected event 0x%x. nmore:%d", (unsigned)event.event_number, nmore); break; } break; case DAT_QUEUE_EMPTY: nmore = 0; break; default: nmore = 0; psdapl_dprint_dat_err(1, dat_rc, "psdapl_flush_evd: dat_evd_wait(). nmore:%d", nmore); } if (!nmore) break; } }
int mca_btl_udapl_component_progress() { mca_btl_udapl_module_t* btl; static int32_t inprogress = 0; DAT_EVENT event; size_t i; int32_t j, rdma_ep_count; int count = 0, btl_ownership; mca_btl_udapl_frag_t* frag; mca_btl_base_endpoint_t* endpoint; /* prevent deadlock - only one thread should be 'progressing' at a time */ if(OPAL_THREAD_ADD32(&inprogress, 1) > 1) { OPAL_THREAD_ADD32(&inprogress, -1); return OMPI_SUCCESS; } /* check for work to do on each uDAPL btl */ OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock); for(i = 0; i < mca_btl_udapl_component.udapl_num_btls; i++) { btl = mca_btl_udapl_component.udapl_btls[i]; /* Check DTO EVD */ while(DAT_SUCCESS == dat_evd_dequeue(btl->udapl_evd_dto, &event)) { DAT_DTO_COMPLETION_EVENT_DATA* dto; switch(event.event_number) { case DAT_DTO_COMPLETION_EVENT: dto = &event.event_data.dto_completion_event_data; frag = dto->user_cookie.as_ptr; /* Was the DTO successful? */ if(DAT_DTO_SUCCESS != dto->status) { if (DAT_DTO_ERR_FLUSHED == dto->status) { BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM, ("DAT_DTO_ERR_FLUSHED: probably OK if occurs during MPI_Finalize().\n")); } else { BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL, ("ERROR: DAT_DTO_COMPLETION_EVENT: %d %d %lu %p.\n", dto->status, frag->type, (unsigned long)frag->size, dto->ep_handle)); } return OMPI_ERROR; } endpoint = frag->endpoint; btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); switch(frag->type) { case MCA_BTL_UDAPL_RDMA_WRITE: { assert(frag->base.des_src == &frag->segment); assert(frag->base.des_src_cnt == 1); assert(frag->base.des_dst == NULL); assert(frag->base.des_dst_cnt == 0); assert(frag->type == MCA_BTL_UDAPL_RDMA_WRITE); frag->base.des_cbfunc(&btl->super, endpoint, &frag->base, OMPI_SUCCESS); if( btl_ownership ) { mca_btl_udapl_free(&btl->super, &frag->base); } OPAL_THREAD_ADD32(&(endpoint->endpoint_lwqe_tokens[BTL_UDAPL_EAGER_CONNECTION]), 1); mca_btl_udapl_frag_progress_pending(btl, endpoint, BTL_UDAPL_EAGER_CONNECTION); break; } case MCA_BTL_UDAPL_SEND: { int connection = BTL_UDAPL_EAGER_CONNECTION; assert(frag->base.des_src == &frag->segment); assert(frag->base.des_src_cnt == 1); assert(frag->base.des_dst == NULL); assert(frag->base.des_dst_cnt == 0); assert(frag->type == MCA_BTL_UDAPL_SEND); if(frag->size != mca_btl_udapl_component.udapl_eager_frag_size) { assert(frag->size == mca_btl_udapl_component.udapl_max_frag_size); connection = BTL_UDAPL_MAX_CONNECTION; } frag->base.des_cbfunc(&btl->super, endpoint, &frag->base, OMPI_SUCCESS); if( btl_ownership ) { mca_btl_udapl_free(&btl->super, &frag->base); } OPAL_THREAD_ADD32(&(endpoint->endpoint_lwqe_tokens[connection]), 1); mca_btl_udapl_frag_progress_pending(btl, endpoint, connection); break; } case MCA_BTL_UDAPL_RECV: { mca_btl_active_message_callback_t* reg; int cntrl_msg = -1; assert(frag->base.des_dst == &frag->segment); assert(frag->base.des_dst_cnt == 1); assert(frag->base.des_src == NULL); assert(frag->base.des_src_cnt == 0); assert(frag->type == MCA_BTL_UDAPL_RECV); assert(frag->triplet.virtual_address == (DAT_VADDR)(uintptr_t)frag->segment.seg_addr.pval); assert(frag->triplet.segment_length == frag->size); assert(frag->btl == btl); /* setup frag ftr location and do callback */ frag->segment.seg_len = dto->transfered_length - sizeof(mca_btl_udapl_footer_t); frag->ftr = (mca_btl_udapl_footer_t *) ((char *)frag->segment.seg_addr.pval + frag->segment.seg_len); cntrl_msg = frag->ftr->tag; reg = mca_btl_base_active_message_trigger + frag->ftr->tag; OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock); reg->cbfunc(&btl->super, frag->ftr->tag, &frag->base, reg->cbdata); OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock); /* Repost the frag */ frag->ftr = frag->segment.seg_addr.pval; frag->segment.seg_len = (frag->size - sizeof(mca_btl_udapl_footer_t) - sizeof(mca_btl_udapl_rdma_footer_t)); frag->base.des_flags = 0; if(frag->size == mca_btl_udapl_component.udapl_eager_frag_size) { OPAL_THREAD_ADD32(&(frag->endpoint->endpoint_sr_credits[BTL_UDAPL_EAGER_CONNECTION]), 1); dat_ep_post_recv(frag->endpoint->endpoint_eager, 1, &frag->triplet, dto->user_cookie, DAT_COMPLETION_DEFAULT_FLAG); if (frag->endpoint->endpoint_sr_credits[BTL_UDAPL_EAGER_CONNECTION] >= mca_btl_udapl_component.udapl_sr_win) { mca_btl_udapl_endpoint_send_sr_credits(frag->endpoint, BTL_UDAPL_EAGER_CONNECTION); } if (MCA_BTL_TAG_UDAPL == cntrl_msg) { mca_btl_udapl_frag_progress_pending(btl, frag->endpoint, BTL_UDAPL_EAGER_CONNECTION); } } else { assert(frag->size == mca_btl_udapl_component.udapl_max_frag_size); OPAL_THREAD_ADD32(&(frag->endpoint->endpoint_sr_credits[BTL_UDAPL_MAX_CONNECTION]), 1); dat_ep_post_recv(frag->endpoint->endpoint_max, 1, &frag->triplet, dto->user_cookie, DAT_COMPLETION_DEFAULT_FLAG); if (frag->endpoint->endpoint_sr_credits[BTL_UDAPL_MAX_CONNECTION] >= mca_btl_udapl_component.udapl_sr_win) { mca_btl_udapl_endpoint_send_sr_credits(frag->endpoint, BTL_UDAPL_MAX_CONNECTION); } if (MCA_BTL_TAG_UDAPL == cntrl_msg) { mca_btl_udapl_frag_progress_pending(btl, frag->endpoint, BTL_UDAPL_MAX_CONNECTION); } } break; } case MCA_BTL_UDAPL_PUT: { assert(frag->base.des_src == &frag->segment); assert(frag->base.des_src_cnt == 1); assert(frag->base.des_dst_cnt == 1); assert(frag->type == MCA_BTL_UDAPL_PUT); frag->base.des_cbfunc(&btl->super, endpoint, &frag->base, OMPI_SUCCESS); if( btl_ownership ) { mca_btl_udapl_free(&btl->super, &frag->base); } OPAL_THREAD_ADD32(&(endpoint->endpoint_lwqe_tokens[BTL_UDAPL_MAX_CONNECTION]), 1); OPAL_THREAD_ADD32(&(endpoint->endpoint_sr_tokens[BTL_UDAPL_MAX_CONNECTION]), 1); mca_btl_udapl_frag_progress_pending(btl, endpoint, BTL_UDAPL_MAX_CONNECTION); break; } case MCA_BTL_UDAPL_CONN_RECV: mca_btl_udapl_endpoint_finish_connect(btl, frag->segment.seg_addr.pval, (int32_t *)((char *)frag->segment.seg_addr.pval + sizeof(mca_btl_udapl_addr_t)), event.event_data.connect_event_data.ep_handle); /* No break - fall through to free */ case MCA_BTL_UDAPL_CONN_SEND: frag->segment.seg_len = mca_btl_udapl_module.super.btl_eager_limit; mca_btl_udapl_free(&btl->super, &frag->base); break; default: BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DIAGNOSE, ("WARNING: unknown frag type: %d\n", frag->type)); } count++; break; default: BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DIAGNOSE, ("WARNING: DTO event: %s (%d)\n", mca_btl_udapl_dat_event_to_string(event.event_number), event.event_number)); } } /* Check connection EVD */ while((btl->udapl_connect_inprogress > 0) && (DAT_SUCCESS == dat_evd_dequeue(btl->udapl_evd_conn, &event))) { switch(event.event_number) { case DAT_CONNECTION_REQUEST_EVENT: /* Accept a new connection */ mca_btl_udapl_accept_connect(btl, event.event_data.cr_arrival_event_data.cr_handle); count++; break; case DAT_CONNECTION_EVENT_ESTABLISHED: /* Both the client and server side of a connection generate this event */ if (mca_btl_udapl_component.udapl_conn_priv_data) { /* private data is only valid at this point if this * event is from a dat_ep_connect call, not an accept */ mca_btl_udapl_endpoint_pd_established_conn(btl, event.event_data.connect_event_data.ep_handle); } else { /* explicitly exchange process data */ mca_btl_udapl_sendrecv(btl, event.event_data.connect_event_data.ep_handle); } count++; break; case DAT_CONNECTION_EVENT_PEER_REJECTED: case DAT_CONNECTION_EVENT_NON_PEER_REJECTED: case DAT_CONNECTION_EVENT_ACCEPT_COMPLETION_ERROR: case DAT_CONNECTION_EVENT_DISCONNECTED: case DAT_CONNECTION_EVENT_BROKEN: case DAT_CONNECTION_EVENT_TIMED_OUT: /* handle this case specially? if we have finite timeout, we might want to try connecting again here. */ case DAT_CONNECTION_EVENT_UNREACHABLE: /* Need to set the BTL endpoint to MCA_BTL_UDAPL_FAILED See dat_ep_connect documentation pdf pg 198 */ BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL, ("WARNING: connection event not handled : %s (%d)\n", mca_btl_udapl_dat_event_to_string(event.event_number), event.event_number)); break; default: BTL_ERROR(("ERROR: connection event : %s (%d)", mca_btl_udapl_dat_event_to_string(event.event_number), event.event_number)); } } /* Check async EVD */ if (btl->udapl_async_events == mca_btl_udapl_component.udapl_async_events) { btl->udapl_async_events = 0; while(DAT_SUCCESS == dat_evd_dequeue(btl->udapl_evd_async, &event)) { switch(event.event_number) { case DAT_ASYNC_ERROR_EVD_OVERFLOW: case DAT_ASYNC_ERROR_IA_CATASTROPHIC: case DAT_ASYNC_ERROR_EP_BROKEN: case DAT_ASYNC_ERROR_TIMED_OUT: case DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR: BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL, ("WARNING: async event ignored : %s (%d)", mca_btl_udapl_dat_event_to_string(event.event_number), event.event_number)); break; default: BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL, ("WARNING: %s (%d)\n", mca_btl_udapl_dat_event_to_string(event.event_number), event.event_number)); } } } else { btl->udapl_async_events++; } /* * Check eager rdma segments */ /* find the number of endpoints with rdma buffers */ rdma_ep_count = btl->udapl_eager_rdma_endpoint_count; for (j = 0; j < rdma_ep_count; j++) { mca_btl_udapl_endpoint_t* endpoint; mca_btl_udapl_frag_t *local_rdma_frag; endpoint = opal_pointer_array_get_item(btl->udapl_eager_rdma_endpoints, j); OPAL_THREAD_LOCK(&endpoint->endpoint_eager_rdma_local.lock); local_rdma_frag = MCA_BTL_UDAPL_GET_LOCAL_RDMA_FRAG(endpoint, endpoint->endpoint_eager_rdma_local.head); if (local_rdma_frag->rdma_ftr->active == 1) { int pad = 0; mca_btl_active_message_callback_t* reg; MCA_BTL_UDAPL_RDMA_NEXT_INDEX(endpoint->endpoint_eager_rdma_local.head); OPAL_THREAD_UNLOCK(&endpoint->endpoint_eager_rdma_local.lock); /* compute pad as needed */ MCA_BTL_UDAPL_FRAG_CALC_ALIGNMENT_PAD(pad, (local_rdma_frag->rdma_ftr->size + sizeof(mca_btl_udapl_footer_t))); /* set fragment information */ local_rdma_frag->ftr = (mca_btl_udapl_footer_t *) ((char *)local_rdma_frag->rdma_ftr - pad - sizeof(mca_btl_udapl_footer_t)); local_rdma_frag->segment.seg_len = local_rdma_frag->rdma_ftr->size; local_rdma_frag->segment.seg_addr.pval = (unsigned char *) ((char *)local_rdma_frag->ftr - local_rdma_frag->segment.seg_len); /* trigger callback */ reg = mca_btl_base_active_message_trigger + local_rdma_frag->ftr->tag; reg->cbfunc(&btl->super, local_rdma_frag->ftr->tag, &local_rdma_frag->base, reg->cbdata); /* repost */ local_rdma_frag->rdma_ftr->active = 0; local_rdma_frag->segment.seg_len = mca_btl_udapl_module.super.btl_eager_limit; local_rdma_frag->base.des_flags = 0; /* increment local rdma credits */ OPAL_THREAD_ADD32(&(endpoint->endpoint_eager_rdma_local.credits), 1); if (endpoint->endpoint_eager_rdma_local.credits >= mca_btl_udapl_component.udapl_eager_rdma_win) { mca_btl_udapl_endpoint_send_eager_rdma_credits(endpoint); } count++; } else { OPAL_THREAD_UNLOCK(&endpoint->endpoint_eager_rdma_local.lock); } } /* end of rdma_count loop */ } /* unlock and return */ OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock); OPAL_THREAD_ADD32(&inprogress, -1); return count; }