static int init_ml_buf_desc(mca_bcol_ptpcoll_ml_buffer_desc_t **desc, void *base_addr, uint32_t num_banks, uint32_t num_buffers_per_bank, uint32_t size_buffer, uint32_t header_size, int group_size, int pow_k) { uint32_t i, j, ci; mca_bcol_ptpcoll_ml_buffer_desc_t *tmp_desc = NULL; int k_nomial_radix = mca_bcol_ptpcoll_component.k_nomial_radix; int pow_k_val = (0 == pow_k) ? 1 : pow_k; int num_to_alloc = ((k_nomial_radix - 1) * pow_k_val * 2 + 1 > mca_bcol_ptpcoll_component.narray_radix) ? (k_nomial_radix - 1) * pow_k_val * 2 + 1 : mca_bcol_ptpcoll_component.narray_radix * 2; *desc = (mca_bcol_ptpcoll_ml_buffer_desc_t *)calloc(num_banks * num_buffers_per_bank, sizeof(mca_bcol_ptpcoll_ml_buffer_desc_t)); if (NULL == *desc) { PTPCOLL_ERROR(("Failed to allocate memory")); return OMPI_ERROR; } tmp_desc = *desc; for (i = 0; i < num_banks; i++) { for (j = 0; j < num_buffers_per_bank; j++) { ci = i * num_buffers_per_bank + j; tmp_desc[ci].bank_index = i; tmp_desc[ci].buffer_index = j; /* *2 is for gather session +1 for extra peer */ tmp_desc[ci].requests = (ompi_request_t **) calloc(num_to_alloc, sizeof(ompi_request_t *)); if (NULL == tmp_desc[ci].requests) { PTPCOLL_ERROR(("Failed to allocate memory for requests")); return OMPI_ERROR; } /* * ptpcoll don't have any header, but other bcols may to have. So * we need to take it in account. */ tmp_desc[ci].data_addr = (void *) ((unsigned char*)base_addr + ci * size_buffer + header_size); PTPCOLL_VERBOSE(10, ("ml memory cache setup %d %d - %p", i, j, tmp_desc[ci].data_addr)); /* init reduce implementation flags */ tmp_desc[ci].reduce_init_called = false; tmp_desc[ci].reduction_status = 0; } } return OMPI_SUCCESS; }
/* We have the same progress func for both cases (R-D and K-Nominal) */ static int bcol_ptpcoll_barrier_extra_node_progress( bcol_function_args_t *input_args, struct coll_ml_function_t *const_args) { /* local variable */ ompi_request_t **requests; int rc, completed, num_reqs = 2; mca_bcol_ptpcoll_collreq_t *collreq = (mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data; requests = collreq->requests; /* test for completion */ completed = mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Test for all failed.")); return rc; } if (!completed) { return BCOL_FN_STARTED; } return BCOL_FN_COMPLETE; }
static int mca_bcol_ptpcoll_barrier_setup(mca_bcol_base_module_t *super, int bcoll_type) { netpatterns_k_exchange_node_t *my_exchange_node; mca_bcol_ptpcoll_module_t * ptpcoll_module = (mca_bcol_ptpcoll_module_t *) super; mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; comm_attribs.bcoll_type = bcoll_type; comm_attribs.comm_size_min = 0; comm_attribs.comm_size_max = 1024 * 1024; comm_attribs.waiting_semantics = NON_BLOCKING; inv_attribs.bcol_msg_min = 0; inv_attribs.bcol_msg_max = 20000; /* range 1 */ inv_attribs.datatype_bitmap = 0xffffffff; inv_attribs.op_types_bitmap = 0xffffffff; comm_attribs.data_src = DATA_SRC_KNOWN; switch(mca_bcol_ptpcoll_component.barrier_alg) { case 1: if (PTPCOLL_EXTRA == ptpcoll_module->pow_2type) { mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, bcol_ptpcoll_barrier_recurs_dbl_extra_new, bcol_ptpcoll_barrier_extra_node_progress); break; } mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, bcol_ptpcoll_barrier_recurs_dbl_new, bcol_ptpcoll_barrier_recurs_dbl_new_progress); break; case 2: my_exchange_node = &ptpcoll_module->knomial_exchange_tree; if (my_exchange_node->n_extra_sources > 0 && EXTRA_NODE == my_exchange_node->node_type) { mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, bcol_ptpcoll_barrier_recurs_knomial_extra_new, bcol_ptpcoll_barrier_extra_node_progress); break; } mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, bcol_ptpcoll_barrier_recurs_knomial_new, bcol_ptpcoll_barrier_recurs_knomial_new_progress); break; default: PTPCOLL_ERROR(("Wrong barrier_alg flag value.")); } return OMPI_SUCCESS; }
static int load_knomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module) { int i; mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component; ptpcoll_module->k_nomial_radix = cm->k_nomial_radix > ptpcoll_module->group_size ? ptpcoll_module->group_size : cm->k_nomial_radix; ptpcoll_module->pow_k = pow_k_calc(ptpcoll_module->k_nomial_radix, ptpcoll_module->group_size, &ptpcoll_module->pow_knum); ptpcoll_module->kn_proxy_extra_index = (int *) malloc(sizeof(int) * (ptpcoll_module->k_nomial_radix - 1)); if (NULL == ptpcoll_module->kn_proxy_extra_index) { PTPCOLL_ERROR(("Failed to allocate memory")); goto Error; } /* Setting peer type for K-nomial algorithm*/ if (ptpcoll_module->super.sbgp_partner_module->my_index < ptpcoll_module->pow_knum ) { if (ptpcoll_module->super.sbgp_partner_module->my_index < ptpcoll_module->group_size - ptpcoll_module->pow_knum) { for (i = 0; i < (ptpcoll_module->k_nomial_radix - 1) && ptpcoll_module->super.sbgp_partner_module->my_index * (ptpcoll_module->k_nomial_radix - 1) + i + ptpcoll_module->pow_knum < ptpcoll_module->group_size ; i++) { ptpcoll_module->pow_ktype = PTPCOLL_KN_PROXY; ptpcoll_module->kn_proxy_extra_index[i] = ptpcoll_module->super.sbgp_partner_module->my_index * (ptpcoll_module->k_nomial_radix - 1) + i + ptpcoll_module->pow_knum; PTPCOLL_VERBOSE(10 ,("My type is proxy, pow_knum = %d [%d] my extra %d", ptpcoll_module->pow_knum, ptpcoll_module->pow_k, ptpcoll_module->kn_proxy_extra_index[i])); } ptpcoll_module->kn_proxy_extra_num = i; } else { PTPCOLL_VERBOSE(10 ,("My type is in group, pow_knum = %d [%d]", ptpcoll_module->pow_knum, ptpcoll_module->pow_k)); ptpcoll_module->pow_ktype = PTPCOLL_KN_IN_GROUP; } } else { ptpcoll_module->pow_ktype = PTPCOLL_KN_EXTRA; ptpcoll_module->kn_proxy_extra_index[0] = (ptpcoll_module->super.sbgp_partner_module->my_index - ptpcoll_module->pow_knum) / (ptpcoll_module->k_nomial_radix - 1); PTPCOLL_VERBOSE(10 ,("My type is extra , pow_knum = %d [%d] my proxy %d", ptpcoll_module->pow_knum, ptpcoll_module->pow_k, ptpcoll_module->kn_proxy_extra_index[0])); } return OMPI_SUCCESS; Error: if (NULL == ptpcoll_module->kn_proxy_extra_index) { free(ptpcoll_module->kn_proxy_extra_index); } return OMPI_ERROR; }
/* Setup N-array scatter Knomial-gather static information */ static int load_narray_knomial_tree (mca_bcol_ptpcoll_module_t *ptpcoll_module) { int rc, i, peer; mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component; ptpcoll_module->full_narray_tree_size = calc_full_tree_size( cm->narray_knomial_radix, ptpcoll_module->group_size, &ptpcoll_module->full_narray_tree_num_leafs); ptpcoll_module->narray_knomial_proxy_extra_index = (int *) malloc(sizeof(int) * (cm->narray_knomial_radix)); if (NULL == ptpcoll_module->narray_knomial_proxy_extra_index) { PTPCOLL_ERROR(("Failed to allocate memory")); goto Error; } ptpcoll_module->narray_knomial_node = calloc( ptpcoll_module->full_narray_tree_size, sizeof(netpatterns_narray_knomial_tree_node_t)); if(NULL == ptpcoll_module->narray_knomial_node) { goto Error; } PTPCOLL_VERBOSE(10 ,("My type is proxy, full tree size = %d [%d]", ptpcoll_module->full_narray_tree_size, cm->narray_knomial_radix )); if (ptpcoll_module->super.sbgp_partner_module->my_index < ptpcoll_module->full_narray_tree_size) { if (ptpcoll_module->super.sbgp_partner_module->my_index < ptpcoll_module->group_size - ptpcoll_module->full_narray_tree_size) { ptpcoll_module->narray_type = PTPCOLL_PROXY; for (i = 0; i < cm->narray_knomial_radix; i++) { peer = ptpcoll_module->super.sbgp_partner_module->my_index * cm->narray_knomial_radix + i + ptpcoll_module->full_narray_tree_size; if (peer >= ptpcoll_module->group_size) { break; } ptpcoll_module->narray_knomial_proxy_extra_index[i] = peer; } ptpcoll_module->narray_knomial_proxy_num = i; } else { ptpcoll_module->narray_type = PTPCOLL_IN_GROUP;; } /* Setting node info */ for(i = 0; i < ptpcoll_module->full_narray_tree_size; i++) { rc = netpatterns_setup_narray_knomial_tree( cm->narray_knomial_radix, i, ptpcoll_module->full_narray_tree_size, &ptpcoll_module->narray_knomial_node[i]); if(OMPI_SUCCESS != rc) { goto Error; } } } else { ptpcoll_module->narray_type = PTPCOLL_EXTRA; ptpcoll_module->narray_knomial_proxy_extra_index[0] = (ptpcoll_module->super.sbgp_partner_module->my_index - ptpcoll_module->full_narray_tree_size) / cm->narray_knomial_radix; } return OMPI_SUCCESS; Error: if (NULL != ptpcoll_module->narray_knomial_node) { free(ptpcoll_module->narray_knomial_node); } if (NULL != ptpcoll_module->narray_knomial_proxy_extra_index) { free(ptpcoll_module->narray_knomial_proxy_extra_index); } return OMPI_ERROR; }
static int bcol_ptpcoll_barrier_recurs_dbl_extra_new( bcol_function_args_t *input_args, struct coll_ml_function_t *const_args) { /* local variable */ uint64_t sequence_number; int rc, completed, num_reqs = 2, tag, my_extra_partner_comm_rank; ompi_request_t **requests; ompi_free_list_item_t *item; mca_bcol_ptpcoll_collreq_t *collreq; mca_bcol_ptpcoll_module_t *ptp_module = (mca_bcol_ptpcoll_module_t *) const_args->bcol_module; ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm; OMPI_FREE_LIST_WAIT_MT(&ptp_module->collreqs_free, item); if (OPAL_UNLIKELY(NULL == item)) { PTPCOLL_ERROR(("Free list waiting failed.")); return OMPI_ERR_OUT_OF_RESOURCE; } collreq = (mca_bcol_ptpcoll_collreq_t *) item; input_args->bcol_opaque_data = (void *) collreq; requests = collreq->requests; /* TAG Calculation */ sequence_number = input_args->sequence_num; /* Keep tag within the limit supportd by the pml */ tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptp_module->tag_mask); /* mark this as a collective tag, to avoid conflict with user-level flags */ tag = -tag; /* I will not participate in the exchange - so just "register" as here, * signal the extra rank that I am here */ my_extra_partner_comm_rank = ptp_module->super.sbgp_partner_module->group_list[ptp_module->proxy_extra_index]; rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT, my_extra_partner_comm_rank, tag, MCA_PML_BASE_SEND_STANDARD, comm, &(requests[0]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Send failed.")); return rc; } /* Recv signal that the rest are done - my_extra_partner_comm_rank */ rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT, my_extra_partner_comm_rank, tag, comm, &(requests[1]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("IRecv failed.")); return rc; } /* Test for completion */ completed = mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Test for all failed.")); return rc; } if (!completed) { return BCOL_FN_STARTED; } OMPI_FREE_LIST_RETURN_MT(&ptp_module->collreqs_free, (ompi_free_list_item_t *) collreq); return BCOL_FN_COMPLETE; }
static int bcol_ptpcoll_barrier_recurs_dbl_new_progress( bcol_function_args_t *input_args, struct coll_ml_function_t *const_args) { /* local variable */ mca_bcol_ptpcoll_module_t *ptp_module = (mca_bcol_ptpcoll_module_t *) const_args->bcol_module; ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm; int rc, exchange, pair_comm_rank, tag, pair_rank, delta, num_reqs, completed, my_rank = ptp_module->super.sbgp_partner_module->my_index, n_exchange = ptp_module->super.sbgp_partner_module->n_levels_pow2; ompi_request_t **requests; mca_bcol_ptpcoll_collreq_t *collreq = (mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data; num_reqs = collreq->num_reqs; requests = collreq->requests; /* test for completion */ completed = mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Test for all failed.")); return rc; } if (!completed) { return BCOL_FN_STARTED; } assert(PTPCOLL_EXTRA != ptp_module->pow_2type); /* Continue loop over exchange send/recv pairs */ num_reqs = 0; tag = collreq->tag; exchange = collreq->exchange; assert(exchange >= 0); delta = 1 << exchange; for (; exchange < n_exchange; ++exchange) { /* rank of exchange partner within the group */ pair_rank = my_rank ^ delta; /* rank within the communicator */ pair_comm_rank = ptp_module->super.sbgp_partner_module->group_list[pair_rank]; /* send to partner - we will wait for completion, as send * completion is at the MPI level, and will not * incur network level completion costs */ rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT, pair_comm_rank, tag, MCA_PML_BASE_SEND_STANDARD, comm, &(requests[0]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("ISend failed.")); return rc; } ++num_reqs; /* recive from partner */ rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT, pair_comm_rank, tag, comm, &(requests[1]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("IRecv failed.")); return rc; } ++num_reqs; PTPCOLL_VERBOSE(5, ("exchange - %d, pair_rank - %d, pair_comm_rank - %d", exchange, pair_rank, pair_comm_rank)); /* test for completion */ completed = mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Test for all failed.")); return rc; } if (!completed) { collreq->num_reqs = num_reqs; collreq->exchange = exchange + 1; assert(collreq->exchange >= 0); return BCOL_FN_STARTED; } delta <<= 1; /* delta *= 2 */ } /* if non power of 2, may need to send message to "extra" proc */ if (collreq->need_toserv_extra) { /* send - let the extra rank know that we are done */ rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT, collreq->extra_partner_rank, tag, MCA_PML_BASE_SEND_STANDARD, comm, &(requests[0]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("ISend failed.")); return rc; } completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Test for isend failed.")); return rc; } if (!completed) { collreq->num_reqs = 1; collreq->need_toserv_extra = 0; collreq->exchange = n_exchange; return BCOL_FN_STARTED; } } return BCOL_FN_COMPLETE; }
static int bcol_ptpcoll_barrier_recurs_dbl_new( bcol_function_args_t *input_args, struct coll_ml_function_t *const_args) { /* local variable */ uint64_t sequence_number; mca_bcol_ptpcoll_module_t *ptp_module = (mca_bcol_ptpcoll_module_t *) const_args->bcol_module; ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm; int rc, my_extra_partner_comm_rank = 0, exchange, completed, pair_comm_rank, pair_rank, delta, tag, num_reqs = 0, my_rank = ptp_module->super.sbgp_partner_module->my_index, n_exchange = ptp_module->super.sbgp_partner_module->n_levels_pow2; ompi_request_t **requests; ompi_free_list_item_t *item; mca_bcol_ptpcoll_collreq_t *collreq; OMPI_FREE_LIST_WAIT_MT(&ptp_module->collreqs_free, item); if (OPAL_UNLIKELY(NULL == item)) { PTPCOLL_ERROR(("Free list waiting failed.")); return OMPI_ERR_OUT_OF_RESOURCE; } collreq = (mca_bcol_ptpcoll_collreq_t *) item; input_args->bcol_opaque_data = (void *) collreq; assert(PTPCOLL_EXTRA != ptp_module->pow_2type); requests = collreq->requests; /* TAG Calculation */ sequence_number = input_args->sequence_num; /* keep tag within the limit supportd by the pml */ tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptp_module->tag_mask); /* mark this as a collective tag, to avoid conflict with user-level flags */ tag = -tag; if (PTPCOLL_PROXY == ptp_module->pow_2type) { /* I will participate in the exchange - wait for signal from extra ** process */ /* * recv from extra rank - my_extra_partner_comm_rank * can use blocking recv, as no other communications * need to take place. */ my_extra_partner_comm_rank = ptp_module->super.sbgp_partner_module->group_list[ptp_module->proxy_extra_index]; collreq->need_toserv_extra = 1; collreq->extra_partner_rank = my_extra_partner_comm_rank; rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT, my_extra_partner_comm_rank, tag, comm, &(requests[0]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("IRecv failed.")); return rc; } completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Test for irecv failed.")); return rc; } if (!completed) { collreq->tag = tag; collreq->num_reqs = 1; collreq->exchange = 0; return BCOL_FN_STARTED; } } else { collreq->need_toserv_extra = 0; } /* Loop over exchange send/recv pairs */ delta = 1; for (exchange = 0; exchange < n_exchange; ++exchange) { /* rank of exchange partner within the group */ pair_rank = my_rank ^ delta; /* rank within the communicator */ pair_comm_rank = ptp_module->super.sbgp_partner_module->group_list[pair_rank]; /* send to partner - we will wait for completion, as send * completion is at the MPI level, and will not * incur network level completion costs */ rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT, pair_comm_rank, tag, MCA_PML_BASE_SEND_STANDARD, comm, &(requests[0]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("ISend failed.")); return rc; } ++num_reqs; /* recive from partner */ rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT, pair_comm_rank, tag, comm, &(requests[1]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("IRecv failed.")); return rc; } ++num_reqs; PTPCOLL_VERBOSE(5, ("exchange - %d, pair_rank - %d, pair_comm_rank - %d", exchange, pair_rank, pair_comm_rank)); /* test for completion */ completed = mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Test for all failed.")); return rc; } if (!completed) { collreq->tag = tag; collreq->num_reqs = num_reqs; collreq->exchange = exchange + 1; assert(collreq->exchange >= 0); return BCOL_FN_STARTED; } delta <<= 1; /* delta *= 2 */ } if (PTPCOLL_PROXY == ptp_module->pow_2type) { /* send - let the extra rank know that we are done */ rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT, my_extra_partner_comm_rank, tag, MCA_PML_BASE_SEND_STANDARD, comm, &(requests[0]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("ISend failed.")); return rc; } completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Test for isend failed.")); return rc; } if (!completed) { collreq->tag = tag; collreq->num_reqs = 1; collreq->need_toserv_extra = 0; collreq->exchange = n_exchange; return BCOL_FN_STARTED; } } OMPI_FREE_LIST_RETURN_MT(&ptp_module->collreqs_free, (ompi_free_list_item_t *) collreq); return BCOL_FN_COMPLETE; }
static int bcol_ptpcoll_barrier_recurs_knomial_extra_new( bcol_function_args_t *input_args, struct coll_ml_function_t *const_args) { /* local variable */ uint64_t sequence_number; int rc, tag, pair_comm_rank, completed, num_reqs = 2; mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) const_args->bcol_module; netpatterns_k_exchange_node_t *my_exchange_node = &ptpcoll_module->knomial_exchange_tree; ompi_communicator_t *comm = ptpcoll_module->super.sbgp_partner_module->group_comm; int *extra_sources_array = my_exchange_node->rank_extra_sources_array; ompi_request_t **requests; ompi_free_list_item_t *item; mca_bcol_ptpcoll_collreq_t *collreq; OMPI_FREE_LIST_WAIT_MT(&ptpcoll_module->collreqs_free, item); if (OPAL_UNLIKELY(NULL == item)) { PTPCOLL_ERROR(("Free list waiting failed.")); return OMPI_ERR_OUT_OF_RESOURCE; } collreq = (mca_bcol_ptpcoll_collreq_t *) item; input_args->bcol_opaque_data = (void *) collreq; requests = collreq->requests; /* TAG Calculation */ sequence_number = input_args->sequence_num; /* Keep tag within the limit supportd by the pml */ tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); /* Mark this as a collective tag, to avoid conflict with user-level flags */ tag = -tag; pair_comm_rank = ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[0]]; rc = MCA_PML_CALL(isend( NULL, 0, MPI_INT, pair_comm_rank, tag, MCA_PML_BASE_SEND_STANDARD, comm, &(requests[0]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("ISend failed.")); return rc; } rc = MCA_PML_CALL(irecv( NULL, 0, MPI_INT, pair_comm_rank, tag, comm, &(requests[1]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("IRecv failed.")); return rc; } /* Test for completion */ completed = mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Test for all failed.")); return rc; } if (!completed) { return BCOL_FN_STARTED; } OMPI_FREE_LIST_RETURN_MT(&ptpcoll_module->collreqs_free, (ompi_free_list_item_t *) collreq); return BCOL_FN_COMPLETE; }
static int bcol_ptpcoll_barrier_recurs_knomial_new( bcol_function_args_t *input_args, struct coll_ml_function_t *const_args) { /* local variable */ uint64_t sequence_number; mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) const_args->bcol_module; netpatterns_k_exchange_node_t *my_exchange_node = &ptpcoll_module->knomial_exchange_tree; int rc, k, pair_comm_rank, exchange, completed, tree_order = my_exchange_node->tree_order, tag, n_extra_sources = my_exchange_node->n_extra_sources, n_exchange = my_exchange_node->n_exchanges, num_reqs; ompi_communicator_t *comm = ptpcoll_module->super.sbgp_partner_module->group_comm; int *extra_sources_array = NULL, **rank_exchanges = my_exchange_node->rank_exchanges; ompi_request_t **requests; ompi_free_list_item_t *item; mca_bcol_ptpcoll_collreq_t *collreq; OMPI_FREE_LIST_WAIT_MT(&ptpcoll_module->collreqs_free, item); if (OPAL_UNLIKELY(NULL == item)) { PTPCOLL_ERROR(("Free list waiting failed.")); return OMPI_ERR_OUT_OF_RESOURCE; } collreq = (mca_bcol_ptpcoll_collreq_t *) item; input_args->bcol_opaque_data = (void *) collreq; requests = collreq->requests; /* TAG Calculation */ sequence_number = input_args->sequence_num; /* Keep tag within the limit supportd by the pml */ tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); /* Mark this as a collective tag, to avoid conflict with user-level flags */ tag = -tag; if (0 < n_extra_sources) { /* EXCHANGE_NODE case */ collreq->need_toserv_extra = 1; extra_sources_array = my_exchange_node->rank_extra_sources_array; /* I will participate in the exchange (of the algorithm) - * wait for signal from extra process */ for (k = 0; k < n_extra_sources; ++k) { pair_comm_rank = ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]]; rc = MCA_PML_CALL(irecv( NULL, 0, MPI_INT, pair_comm_rank, tag, comm, &(requests[k]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("IRecv failed.")); return rc; } } num_reqs = n_extra_sources; /* Test for completion */ completed = mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Test for all failed.")); return rc; } if (!completed) { collreq->tag = tag; collreq->num_reqs = num_reqs; collreq->exchange = 0; return BCOL_FN_STARTED; } } else { collreq->need_toserv_extra = 0; } /* loop over exchange send/recv pairs */ for (exchange = 0; exchange < n_exchange; ++exchange) { for (k = 0; k < tree_order - 1; ++k) { /* rank of exchange partner within the group */ pair_comm_rank = ptpcoll_module->super.sbgp_partner_module->group_list[rank_exchanges[exchange][k]]; assert(2 * ptpcoll_module->k_nomial_radix > (k * 2 + 1)); /* send to partner - we will wait for completion, as send * completion is at the MPI level, and will not * incur network level completion costs */ rc = MCA_PML_CALL(isend( NULL, 0, MPI_INT, pair_comm_rank, tag, MCA_PML_BASE_SEND_STANDARD, comm, &(requests[k * 2 + 1]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("ISend failed.")); return rc; } PTPCOLL_VERBOSE(10, ("Ex %d, K %d send to %d[%d]", exchange, k, pair_comm_rank, rank_exchanges[exchange][k])); /* recive from partner */ rc = MCA_PML_CALL(irecv( NULL, 0, MPI_INT, pair_comm_rank, tag, comm, &(requests[k * 2]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("IRecv failed.")); return rc; } PTPCOLL_VERBOSE(10, ("Ex %d, K %d irecv from %d[%d]", exchange, k, pair_comm_rank, rank_exchanges[exchange][k])); } num_reqs = 2 * (tree_order - 1); /* Test for completion */ completed = mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Test for all failed.")); return rc; } if (!completed) { collreq->tag = tag; collreq->num_reqs = num_reqs; collreq->exchange = exchange + 1; return BCOL_FN_STARTED; } } /* If non power of 2, may need to send message to "extra" proc */ if (0 < n_extra_sources) { /* EXCHANGE_NODE case */ for (k = 0; k < n_extra_sources; ++k) { pair_comm_rank = ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]]; rc = MCA_PML_CALL(isend( NULL, 0, MPI_INT, pair_comm_rank, tag, MCA_PML_BASE_SEND_STANDARD, comm, &(requests[k]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("ISend failed.")); return rc; } } num_reqs = n_extra_sources; /* Test for completion */ completed = mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Test for all failed.")); return rc; } if (!completed) { collreq->tag = tag; collreq->num_reqs = num_reqs; collreq->exchange = n_exchange; collreq->need_toserv_extra = 0; return BCOL_FN_STARTED; } } OMPI_FREE_LIST_RETURN_MT(&ptpcoll_module->collreqs_free, (ompi_free_list_item_t *) collreq); return BCOL_FN_COMPLETE; }
static int bcol_ptpcoll_barrier_recurs_knomial_new_progress( bcol_function_args_t *input_args, struct coll_ml_function_t *const_args) { /* local variable */ mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) const_args->bcol_module; netpatterns_k_exchange_node_t *my_exchange_node = &ptpcoll_module->knomial_exchange_tree; int rc, k, tag, pair_comm_rank, exchange, tree_order = my_exchange_node->tree_order, num_reqs, n_exchange = my_exchange_node->n_exchanges, completed, n_extra_sources = my_exchange_node->n_extra_sources; ompi_communicator_t *comm = ptpcoll_module->super.sbgp_partner_module->group_comm; int *extra_sources_array, **rank_exchanges = my_exchange_node->rank_exchanges; mca_bcol_ptpcoll_collreq_t *collreq = (mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data; ompi_request_t **requests = collreq->requests; num_reqs = collreq->num_reqs; /* Test for completion */ completed = mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Test for all failed.")); return rc; } if (!completed) { return BCOL_FN_STARTED; } /* Continue loop over exchange send/recv pairs */ tag = collreq->tag; for (exchange = collreq->exchange; exchange < n_exchange; ++exchange) { for (k = 0; k < tree_order - 1; ++k) { /* rank of exchange partner within the group */ pair_comm_rank = ptpcoll_module->super.sbgp_partner_module->group_list[rank_exchanges[exchange][k]]; assert(2 * ptpcoll_module->k_nomial_radix > (k * 2 + 1)); /* send to partner - we will wait for completion, as send * completion is at the MPI level, and will not * incur network level completion costs */ rc = MCA_PML_CALL(isend( NULL, 0, MPI_INT, pair_comm_rank, tag, MCA_PML_BASE_SEND_STANDARD, comm, &(requests[k * 2 + 1]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("ISend failed.")); return rc; } PTPCOLL_VERBOSE(10, ("Ex %d, K %d send to %d[%d]", exchange, k, pair_comm_rank, rank_exchanges[exchange][k])); /* recive from partner */ rc = MCA_PML_CALL(irecv( NULL, 0, MPI_INT, pair_comm_rank, tag, comm, &(requests[k * 2]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("IRecv failed.")); return rc; } PTPCOLL_VERBOSE(10, ("Ex %d, K %d irecv from %d[%d]", exchange, k, pair_comm_rank, rank_exchanges[exchange][k])); } num_reqs = 2 * (tree_order - 1); /* Test for completion */ completed = mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Test for all failed.")); return rc; } if (!completed) { collreq->num_reqs = num_reqs; collreq->exchange = exchange + 1; return BCOL_FN_STARTED; } } /* If non power of 2, may need to send message to "extra" proc */ if (collreq->need_toserv_extra) { /* EXCHANGE_NODE case */ extra_sources_array = my_exchange_node->rank_extra_sources_array; for (k = 0; k < n_extra_sources; ++k) { pair_comm_rank = ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]]; rc = MCA_PML_CALL(isend( NULL, 0, MPI_INT, pair_comm_rank, tag, MCA_PML_BASE_SEND_STANDARD, comm, &(requests[k]))); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("ISend failed.")); return rc; } } num_reqs = n_extra_sources; /* Test for completion */ completed = mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { PTPCOLL_ERROR(("Test for all failed.")); return rc; } if (!completed) { collreq->num_reqs = num_reqs; collreq->exchange = n_exchange; collreq->need_toserv_extra = 0; return BCOL_FN_STARTED; } } return BCOL_FN_COMPLETE; }