static int ompi_comm_checkcid (ompi_comm_request_t *request) { ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context; ompi_request_t *subreq; int ret; if (OPAL_THREAD_TRYLOCK(&ompi_cid_lock)) { return ompi_comm_request_schedule_append (request, ompi_comm_checkcid, NULL, 0); } context->flag = (context->nextcid == context->nextlocal_cid); if (!context->flag) { opal_pointer_array_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL); context->flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, context->nextcid, context->comm); } ++context->iter; ret = context->allreduce_fn (&context->flag, &context->rflag, 1, MPI_MIN, context, &subreq); if (OMPI_SUCCESS == ret) { ompi_comm_request_schedule_append (request, ompi_comm_nextcid_check_flag, &subreq, 1); } OPAL_THREAD_UNLOCK(&ompi_cid_lock); return ret; }
static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request) { ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context; if (OPAL_THREAD_TRYLOCK(&ompi_cid_lock)) { return ompi_comm_request_schedule_append (request, ompi_comm_nextcid_check_flag, NULL, 0); } if (1 == context->rflag) { /* set the according values to the newcomm */ context->newcomm->c_contextid = context->nextcid; opal_pointer_array_set_item (&ompi_mpi_communicators, context->nextcid, context->newcomm); /* unlock the cid generator */ ompi_comm_cid_lowest_id = INT64_MAX; OPAL_THREAD_UNLOCK(&ompi_cid_lock); /* done! */ return OMPI_SUCCESS; } if (1 == context->flag) { /* we could use this cid, but other don't agree */ opal_pointer_array_set_item (&ompi_mpi_communicators, context->nextcid, NULL); context->start = context->nextcid + 1; /* that's where we can start the next round */ } ++context->iter; OPAL_THREAD_UNLOCK(&ompi_cid_lock); /* try again */ return ompi_comm_allreduce_getnextcid (request); }
static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request) { ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context; int64_t my_id = ((int64_t) ompi_comm_get_cid (context->comm) << 32 | context->pml_tag); ompi_request_t *subreq; bool flag; int ret; if (OPAL_THREAD_TRYLOCK(&ompi_cid_lock)) { return ompi_comm_request_schedule_append (request, ompi_comm_allreduce_getnextcid, NULL, 0); } if (ompi_comm_cid_lowest_id < my_id) { OPAL_THREAD_UNLOCK(&ompi_cid_lock); return ompi_comm_request_schedule_append (request, ompi_comm_allreduce_getnextcid, NULL, 0); } ompi_comm_cid_lowest_id = my_id; /** * This is the real algorithm described in the doc */ flag = false; context->nextlocal_cid = mca_pml.pml_max_contextid; for (unsigned int i = context->start ; i < mca_pml.pml_max_contextid ; ++i) { flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, i, context->comm); if (true == flag) { context->nextlocal_cid = i; break; } } ret = context->allreduce_fn (&context->nextlocal_cid, &context->nextcid, 1, MPI_MAX, context, &subreq); if (OMPI_SUCCESS != ret) { ompi_comm_cid_lowest_id = INT64_MAX; OPAL_THREAD_UNLOCK(&ompi_cid_lock); return ret; } if ((unsigned int) context->nextlocal_cid == mca_pml.pml_max_contextid) { /* at least one peer ran out of CIDs */ if (flag) { opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL); } ompi_comm_cid_lowest_id = INT64_MAX; OPAL_THREAD_UNLOCK(&ompi_cid_lock); return OMPI_ERR_OUT_OF_RESOURCE; } OPAL_THREAD_UNLOCK(&ompi_cid_lock); /* next we want to verify that the resulting commid is ok */ return ompi_comm_request_schedule_append (request, ompi_comm_checkcid, &subreq, 1); }
/* the progress function to be called from the opal progress function */ int bcol_basesmuma_progress(void) { /* local variables */ volatile int32_t *cntr; mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; /* check to see if release of memory blocks needs to be done */ if( opal_list_get_size(&(cs->nb_admin_barriers)) ) { sm_nbbar_desc_t *item_ptr; opal_list_t *list=&(cs->nb_admin_barriers); /* process only if the list is non-empty */ if( !OPAL_THREAD_TRYLOCK(&cs->nb_admin_barriers_mutex)) { for (item_ptr = (sm_nbbar_desc_t*) opal_list_get_first(list); item_ptr != (sm_nbbar_desc_t*) opal_list_get_end(list); item_ptr = (sm_nbbar_desc_t*) opal_list_get_next(item_ptr) ) { bcol_basesmuma_rd_nb_barrier_progress_admin(item_ptr); /* check to see if an complete */ if( NB_BARRIER_DONE == item_ptr->collective_phase ) { /* barrier is complete - remove from the list. No need * to put it on another list, as it is part of the memory * bank control structure, and will be picked up * again when needed. */ int index= item_ptr->pool_index; /* old way - ctl_struct specific */ /* volatile uint64_t *cntr= (volatile uint64_t *) &(item_ptr->sm_module->colls_no_user_data. ctl_buffs_mgmt[index].bank_gen_counter); */ cntr= (volatile int32_t *) &(item_ptr->coll_buff-> ctl_buffs_mgmt[index].bank_gen_counter); item_ptr=(sm_nbbar_desc_t*)opal_list_remove_item((opal_list_t *)list, ( opal_list_item_t *)item_ptr); /* increment the generation number */ OPAL_THREAD_ADD32(cntr,1); } } OPAL_THREAD_UNLOCK(&cs->nb_admin_barriers_mutex); } } return OMPI_SUCCESS; }
/* * Elan4 component progress. */ int mca_btl_elan_component_progress( void ) { int num_progressed = 0, i; for( i = 0; i < (int)mca_btl_elan_component.elan_num_btls; i++ ) { mca_btl_elan_module_t* elan_btl = mca_btl_elan_component.elan_btls[i]; /* This is a fast receive over the queue */ if( elan_queueRxPoll( elan_btl->rx_queue, 0 ) ) { mca_btl_active_message_callback_t* reg; mca_btl_elan_hdr_t* elan_hdr = NULL; mca_btl_elan_frag_t frag; elan_hdr = (mca_btl_elan_hdr_t*)elan_queueRxWait( elan_btl->rx_queue, NULL, 0 ); frag.base.des_dst = &frag.segment; frag.base.des_dst->seg_addr.pval = (void*)(elan_hdr+1); frag.base.des_dst->seg_len = (size_t)elan_hdr->length; frag.base.des_dst_cnt = 1; frag.tag = (mca_btl_base_tag_t)elan_hdr->tag; frag.size = elan_hdr->length; reg = mca_btl_base_active_message_trigger + frag.tag; reg->cbfunc( &(elan_btl->super), frag.tag, &(frag.base), reg->cbdata ); elan_queueRxComplete( elan_btl->rx_queue ); num_progressed++; } /* This is the slower receive over the tport */ if(elan_btl->expect_tport_recv && !OPAL_THREAD_TRYLOCK(&elan_btl->elan_lock)) { mca_btl_elan_frag_t* frag = (mca_btl_elan_frag_t*)opal_list_get_first( &(elan_btl->recv_list) ); if( elan_done(frag->elan_event, 0) ) { int tag; size_t length; mca_btl_active_message_callback_t* reg; void* recv_buf; recv_buf = (mca_btl_elan_hdr_t*)elan_tportRxWait( frag->elan_event, NULL, &tag, &length ); num_progressed++; /*elan_btl->expect_tport_recv--;*/ opal_list_remove_first( &(elan_btl->recv_list) ); OPAL_THREAD_UNLOCK(&elan_btl->elan_lock); frag->base.des_dst->seg_addr.pval = (void*)recv_buf; frag->base.des_dst->seg_len = length; frag->tag = (mca_btl_base_tag_t)tag; reg = mca_btl_base_active_message_trigger + frag->tag; reg->cbfunc( &(elan_btl->super), frag->tag, &(frag->base), reg->cbdata ); if( recv_buf != (void*)(frag+1) ) { elan_tportBufFree( elan_btl->tport, recv_buf ); frag->base.des_dst->seg_addr.pval = (void*)(frag+1); } frag->elan_event = elan_tportRxStart( elan_btl->tport, ELAN_TPORT_RXBUF | ELAN_TPORT_RXANY, 0, 0, 0, 0, frag->base.des_dst->seg_addr.pval, mca_btl_elan_module.super.btl_eager_limit ); OPAL_THREAD_LOCK(&elan_btl->elan_lock); opal_list_append( &(elan_btl->recv_list), (opal_list_item_t*)frag ); } OPAL_THREAD_UNLOCK(&elan_btl->elan_lock); } /* If there are any pending sends check their completion */ recheck_send_list: if( !opal_list_is_empty( &(elan_btl->send_list) ) && !OPAL_THREAD_TRYLOCK(&elan_btl->elan_lock) ) { mca_btl_elan_frag_t* frag = (mca_btl_elan_frag_t*)opal_list_get_first( &(elan_btl->send_list) ); if( (NULL != frag) && elan_poll(frag->elan_event, 0) ) { int btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP ); opal_list_remove_first( &(elan_btl->send_list) ); OPAL_THREAD_UNLOCK(&elan_btl->elan_lock); num_progressed++; frag->base.des_cbfunc( &(elan_btl->super), frag->endpoint, &(frag->base), OMPI_SUCCESS ); if( btl_ownership ) { MCA_BTL_ELAN_FRAG_RETURN(frag); } goto recheck_send_list; } else { OPAL_THREAD_UNLOCK(&elan_btl->elan_lock); } } recheck_rdma_list: /* If any RDMA have been posted, check their status */ if( !opal_list_is_empty( &(elan_btl->rdma_list) ) && !OPAL_THREAD_TRYLOCK(&elan_btl->elan_lock) ) { mca_btl_elan_frag_t* frag = (mca_btl_elan_frag_t*)opal_list_get_first( &(elan_btl->rdma_list) ); if( (NULL != frag) && elan_poll(frag->elan_event, 0) ) { int btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP ); opal_list_remove_first( &(elan_btl->rdma_list) ); OPAL_THREAD_UNLOCK(&elan_btl->elan_lock); num_progressed++; frag->base.des_cbfunc( &(elan_btl->super), frag->endpoint, &(frag->base), OMPI_SUCCESS ); if( btl_ownership ) { MCA_BTL_ELAN_FRAG_RETURN(frag); } goto recheck_rdma_list; } else { OPAL_THREAD_UNLOCK(&elan_btl->elan_lock); } } } return num_progressed; }