size_t mca_pml_ob1_rdma_pipeline_btls_count (mca_bml_base_endpoint_t* bml_endpoint) { int num_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma); int num_eager_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_eager); int rdma_count = 0; for(int i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; ++i) { mca_bml_base_btl_t *bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma); /* NTH: go ahead and use an rdma btl if is the only one */ bool ignore = !mca_pml_ob1.use_all_rdma; for (int i = 0 ; i < num_eager_btls && ignore ; ++i) { mca_bml_base_btl_t *eager_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_eager, i); if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) { ignore = false; break; } } if (!ignore) { ++rdma_count; } } return rdma_count; }
int mca_pml_ob1_progress(void) { int i, queue_length = opal_list_get_size(&mca_pml_ob1.send_pending); int j, completed_requests = 0; bool send_succedded; #if OPAL_CUDA_SUPPORT mca_pml_ob1_process_pending_cuda_async_copies(); #endif /* OPAL_CUDA_SUPPORT */ if( OPAL_LIKELY(0 == queue_length) ) return 0; for( i = 0; i < queue_length; i++ ) { mca_pml_ob1_send_pending_t pending_type = MCA_PML_OB1_SEND_PENDING_NONE; mca_pml_ob1_send_request_t* sendreq; mca_bml_base_endpoint_t* endpoint; sendreq = get_request_from_send_pending(&pending_type); if(OPAL_UNLIKELY(NULL == sendreq)) break; switch(pending_type) { case MCA_PML_OB1_SEND_PENDING_NONE: assert(0); return 0; case MCA_PML_OB1_SEND_PENDING_SCHEDULE: if( mca_pml_ob1_send_request_schedule_exclusive(sendreq) == OMPI_ERR_OUT_OF_RESOURCE ) { return 0; } completed_requests++; break; case MCA_PML_OB1_SEND_PENDING_START: MCA_PML_OB1_SEND_REQUEST_RESET(sendreq); endpoint = sendreq->req_endpoint; send_succedded = false; for(j = 0; j < (int)mca_bml_base_btl_array_get_size(&endpoint->btl_eager); j++) { mca_bml_base_btl_t* bml_btl; int rc; /* select a btl */ bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); rc = mca_pml_ob1_send_request_start_btl(sendreq, bml_btl); if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) { send_succedded = true; completed_requests++; break; } } if( false == send_succedded ) { add_request_to_send_pending(sendreq, MCA_PML_OB1_SEND_PENDING_START, true); } } } return completed_requests; }
/* try to get a small message out on to the wire quickly */ static inline int mca_pml_ob1_send_inline (const void *buf, size_t count, ompi_datatype_t * datatype, int dst, int tag, int16_t seqn, ompi_proc_t *dst_proc, mca_bml_base_endpoint_t* endpoint, ompi_communicator_t * comm) { mca_pml_ob1_match_hdr_t match; mca_bml_base_btl_t *bml_btl; opal_convertor_t convertor; size_t size; int rc; bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); if( NULL == bml_btl->btl->btl_sendi) return OMPI_ERR_NOT_AVAILABLE; ompi_datatype_type_size (datatype, &size); if ((size * count) > 256) { /* some random number */ return OMPI_ERR_NOT_AVAILABLE; } if (count > 0) { /* initialize just enough of the convertor to avoid a SEGV in opal_convertor_cleanup */ OBJ_CONSTRUCT(&convertor, opal_convertor_t); /* We will create a convertor specialized for the */ /* remote architecture and prepared with the datatype. */ opal_convertor_copy_and_prepare_for_send (dst_proc->super.proc_convertor, (const struct opal_datatype_t *) datatype, count, buf, 0, &convertor); opal_convertor_get_packed_size (&convertor, &size); } else { size = 0; } mca_pml_ob1_match_hdr_prepare (&match, MCA_PML_OB1_HDR_TYPE_MATCH, 0, comm->c_contextid, comm->c_my_rank, tag, seqn); ob1_hdr_hton(&match, MCA_PML_OB1_HDR_TYPE_MATCH, dst_proc); /* try to send immediately */ rc = mca_bml_base_sendi (bml_btl, &convertor, &match, OMPI_PML_OB1_MATCH_HDR_LEN, size, MCA_BTL_NO_ORDER, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, MCA_PML_OB1_HDR_TYPE_MATCH, NULL); if (count > 0) { opal_convertor_cleanup (&convertor); } if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { return rc; } return (int) size; }
size_t mca_pml_bfo_rdma_pipeline_btls( mca_bml_base_endpoint_t* bml_endpoint, size_t size, mca_pml_bfo_com_btl_t* rdma_btls ) { int i, num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma); double weight_total = 0; for(i = 0; i < num_btls && i < mca_pml_bfo.max_rdma_per_request; i++) { rdma_btls[i].bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma); if(NULL != rdma_btls[i].bml_btl->btl->btl_mpool) rdma_btls[i].btl_reg = NULL; else rdma_btls[i].btl_reg = &pml_bfo_dummy_reg; weight_total += rdma_btls[i].bml_btl->btl_weight; } mca_pml_bfo_calc_weighted_length(rdma_btls, i, size, weight_total); return i; }
size_t mca_pml_ob1_rdma_pipeline_btls( mca_bml_base_endpoint_t* bml_endpoint, size_t size, mca_pml_ob1_com_btl_t* rdma_btls ) { int num_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma); int num_eager_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_eager); double weight_total = 0; int rdma_count = 0; for(int i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; i++) { mca_bml_base_btl_t *bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma); /* NTH: go ahead and use an rdma btl if is the only one */ bool ignore = !mca_pml_ob1.use_all_rdma; for (int i = 0 ; i < num_eager_btls && ignore ; ++i) { mca_bml_base_btl_t *eager_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_eager, i); if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) { ignore = false; break; } } if (ignore) { continue; } rdma_btls[rdma_count].bml_btl = bml_btl; rdma_btls[rdma_count++].btl_reg = NULL; weight_total += bml_btl->btl_weight; } mca_pml_ob1_calc_weighted_length (rdma_btls, rdma_count, size, weight_total); return rdma_count; }
/* try to get a small message out on to the wire quickly */ static inline int mca_pml_ob1_send_inline (void *buf, size_t count, ompi_datatype_t * datatype, int dst, int tag, int16_t seqn, ompi_proc_t *dst_proc, mca_bml_base_endpoint_t* endpoint, ompi_communicator_t * comm) { mca_btl_base_descriptor_t *des = NULL; mca_pml_ob1_match_hdr_t match; mca_bml_base_btl_t *bml_btl; OPAL_PTRDIFF_TYPE lb, extent; opal_convertor_t convertor; size_t size = 0; int rc; bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); ompi_datatype_get_extent (datatype, &lb, &extent); if (OPAL_UNLIKELY((extent * count) > 256 || !bml_btl->btl->btl_sendi)) { return OMPI_ERR_NOT_AVAILABLE; } if (count > 0) { /* initialize just enough of the convertor to avoid a SEGV in opal_convertor_cleanup */ OBJ_CONSTRUCT(&convertor, opal_convertor_t); /* We will create a convertor specialized for the */ /* remote architecture and prepared with the datatype. */ opal_convertor_copy_and_prepare_for_send (dst_proc->proc_convertor, (const struct opal_datatype_t *) datatype, count, buf, 0, &convertor); opal_convertor_get_packed_size (&convertor, &size); } match.hdr_common.hdr_flags = 0; match.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH; match.hdr_ctx = comm->c_contextid; match.hdr_src = comm->c_my_rank; match.hdr_tag = tag; match.hdr_seq = seqn; ob1_hdr_hton(&match, MCA_PML_OB1_HDR_TYPE_MATCH, dst_proc); /* try to send immediately */ rc = mca_bml_base_sendi (bml_btl, &convertor, &match, OMPI_PML_OB1_MATCH_HDR_LEN, size, MCA_BTL_NO_ORDER, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, MCA_PML_OB1_HDR_TYPE_MATCH, &des); if (count > 0) { opal_convertor_cleanup (&convertor); } if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { if (des) { mca_bml_base_free (bml_btl, des); } return rc; } return (int) size; }