int Coll_scatter_mvapich2_two_level_binomial::scatter(void *sendbuf, int sendcnt, MPI_Datatype sendtype, void *recvbuf, int recvcnt, MPI_Datatype recvtype, int root, MPI_Comm comm) { int comm_size, rank; int local_rank, local_size; int leader_comm_rank = -1, leader_comm_size = -1; int mpi_errno = MPI_SUCCESS; int recvtype_size, sendtype_size, nbytes; void *tmp_buf = NULL; void *leader_scatter_buf = NULL; MPI_Status status; int leader_root = -1, leader_of_root = -1; MPI_Comm shmem_comm, leader_comm; //if not set (use of the algo directly, without mvapich2 selector) if(MV2_Scatter_intra_function==NULL) MV2_Scatter_intra_function=Coll_scatter_mpich::scatter; if(comm->get_leaders_comm()==MPI_COMM_NULL){ comm->init_smp(); } comm_size = comm->size(); rank = comm->rank(); if (((rank == root) && (recvcnt == 0)) || ((rank != root) && (sendcnt == 0))) { return MPI_SUCCESS; } /* extract the rank,size information for the intra-node * communicator */ shmem_comm = comm->get_intra_comm(); local_rank = shmem_comm->rank(); local_size = shmem_comm->size(); if (local_rank == 0) { /* Node leader. Extract the rank, size information for the leader * communicator */ leader_comm = comm->get_leaders_comm(); leader_comm_size = leader_comm->size(); leader_comm_rank = leader_comm->rank(); } if (local_size == comm_size) { /* purely intra-node scatter. Just use the direct algorithm and we are done */ mpi_errno = MPIR_Scatter_MV2_Direct(sendbuf, sendcnt, sendtype, recvbuf, recvcnt, recvtype, root, comm); } else { recvtype_size=recvtype->size(); sendtype_size=sendtype->size(); if (rank == root) { nbytes = sendcnt * sendtype_size; } else { nbytes = recvcnt * recvtype_size; } if (local_rank == 0) { /* Node leader, allocate tmp_buffer */ tmp_buf = smpi_get_tmp_sendbuffer(nbytes * local_size); } leader_comm = comm->get_leaders_comm(); int* leaders_map = comm->get_leaders_map(); leader_of_root = comm->group()->rank(leaders_map[root]); leader_root = leader_comm->group()->rank(leaders_map[root]); /* leader_root is the rank of the leader of the root in leader_comm. * leader_root is to be used as the root of the inter-leader gather ops */ if ((local_rank == 0) && (root != rank) && (leader_of_root == rank)) { /* The root of the scatter operation is not the node leader. Recv * data from the node leader */ leader_scatter_buf = smpi_get_tmp_sendbuffer(nbytes * comm_size); Request::recv(leader_scatter_buf, nbytes * comm_size, MPI_BYTE, root, COLL_TAG_SCATTER, comm, &status); } if (rank == root && local_rank != 0) { /* The root of the scatter operation is not the node leader. Send * data to the node leader */ Request::send(sendbuf, sendcnt * comm_size, sendtype, leader_of_root, COLL_TAG_SCATTER, comm); } if (leader_comm_size > 1 && local_rank == 0) { if (not comm->is_uniform()) { int* displs = NULL; int* sendcnts = NULL; int* node_sizes; int i = 0; node_sizes = comm->get_non_uniform_map(); if (root != leader_of_root) { if (leader_comm_rank == leader_root) { displs = static_cast<int*>(xbt_malloc(sizeof(int) * leader_comm_size)); sendcnts = static_cast<int*>(xbt_malloc(sizeof(int) * leader_comm_size)); sendcnts[0] = node_sizes[0] * nbytes; displs[0] = 0; for (i = 1; i < leader_comm_size; i++) { displs[i] = displs[i - 1] + node_sizes[i - 1] * nbytes; sendcnts[i] = node_sizes[i] * nbytes; } } Colls::scatterv(leader_scatter_buf, sendcnts, displs, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root, leader_comm); } else { if (leader_comm_rank == leader_root) { displs = static_cast<int*>(xbt_malloc(sizeof(int) * leader_comm_size)); sendcnts = static_cast<int*>(xbt_malloc(sizeof(int) * leader_comm_size)); sendcnts[0] = node_sizes[0] * sendcnt; displs[0] = 0; for (i = 1; i < leader_comm_size; i++) { displs[i] = displs[i - 1] + node_sizes[i - 1] * sendcnt; sendcnts[i] = node_sizes[i] * sendcnt; } } Colls::scatterv(sendbuf, sendcnts, displs, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root, leader_comm); } if (leader_comm_rank == leader_root) { xbt_free(displs); xbt_free(sendcnts); } } else { if (leader_of_root != root) { mpi_errno = MPIR_Scatter_MV2_Binomial(leader_scatter_buf, nbytes * local_size, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root, leader_comm); } else { mpi_errno = MPIR_Scatter_MV2_Binomial(sendbuf, sendcnt * local_size, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root, leader_comm); } } } /* The leaders are now done with the inter-leader part. Scatter the data within the nodes */ if (rank == root && recvbuf == MPI_IN_PLACE) { mpi_errno = MV2_Scatter_intra_function(tmp_buf, nbytes, MPI_BYTE, (void *)sendbuf, sendcnt, sendtype, 0, shmem_comm); } else { mpi_errno = MV2_Scatter_intra_function(tmp_buf, nbytes, MPI_BYTE, recvbuf, recvcnt, recvtype, 0, shmem_comm); } } /* check if multiple threads are calling this collective function */ if (comm_size != local_size && local_rank == 0) { smpi_free_tmp_buffer(tmp_buf); if (leader_of_root == rank && root != rank) { smpi_free_tmp_buffer(leader_scatter_buf); } } return (mpi_errno); }
int Coll_scatter_mvapich2::scatter(const void *sendbuf, int sendcnt, MPI_Datatype sendtype, void *recvbuf, int recvcnt, MPI_Datatype recvtype, int root, MPI_Comm comm) { int range = 0, range_threshold = 0, range_threshold_intra = 0; int mpi_errno = MPI_SUCCESS; // int mpi_errno_ret = MPI_SUCCESS; int rank, nbytes, comm_size; int partial_sub_ok = 0; int conf_index = 0; MPI_Comm shmem_comm; // MPID_Comm *shmem_commptr=NULL; if(mv2_scatter_thresholds_table==NULL) init_mv2_scatter_tables_stampede(); if(comm->get_leaders_comm()==MPI_COMM_NULL){ comm->init_smp(); } comm_size = comm->size(); rank = comm->rank(); if (rank == root) { int sendtype_size = sendtype->size(); nbytes = sendcnt * sendtype_size; } else { int recvtype_size = recvtype->size(); nbytes = recvcnt * recvtype_size; } // check if safe to use partial subscription mode if (comm->is_uniform()) { shmem_comm = comm->get_intra_comm(); if (mv2_scatter_table_ppn_conf[0] == -1) { // Indicating user defined tuning conf_index = 0; }else{ int local_size = shmem_comm->size(); int i = 0; do { if (local_size == mv2_scatter_table_ppn_conf[i]) { conf_index = i; partial_sub_ok = 1; break; } i++; } while(i < mv2_scatter_num_ppn_conf); } } if (partial_sub_ok != 1) { conf_index = 0; } /* Search for the corresponding system size inside the tuning table */ while ((range < (mv2_size_scatter_tuning_table[conf_index] - 1)) && (comm_size > mv2_scatter_thresholds_table[conf_index][range].numproc)) { range++; } /* Search for corresponding inter-leader function */ while ((range_threshold < (mv2_scatter_thresholds_table[conf_index][range].size_inter_table - 1)) && (nbytes > mv2_scatter_thresholds_table[conf_index][range].inter_leader[range_threshold].max) && (mv2_scatter_thresholds_table[conf_index][range].inter_leader[range_threshold].max != -1)) { range_threshold++; } /* Search for corresponding intra-node function */ while ((range_threshold_intra < (mv2_scatter_thresholds_table[conf_index][range].size_intra_table - 1)) && (nbytes > mv2_scatter_thresholds_table[conf_index][range].intra_node[range_threshold_intra].max) && (mv2_scatter_thresholds_table[conf_index][range].intra_node[range_threshold_intra].max != -1)) { range_threshold_intra++; } MV2_Scatter_function = mv2_scatter_thresholds_table[conf_index][range].inter_leader[range_threshold] .MV2_pt_Scatter_function; if(MV2_Scatter_function == &MPIR_Scatter_mcst_wrap_MV2) { #if defined(_MCST_SUPPORT_) if(comm->ch.is_mcast_ok == 1 && mv2_use_mcast_scatter == 1 && comm->ch.shmem_coll_ok == 1) { MV2_Scatter_function = &MPIR_Scatter_mcst_MV2; } else #endif /*#if defined(_MCST_SUPPORT_) */ { if(mv2_scatter_thresholds_table[conf_index][range].inter_leader[range_threshold + 1]. MV2_pt_Scatter_function != NULL) { MV2_Scatter_function = mv2_scatter_thresholds_table[conf_index][range].inter_leader[range_threshold + 1] .MV2_pt_Scatter_function; } else { /* Fallback! */ MV2_Scatter_function = &MPIR_Scatter_MV2_Binomial; } } } if( (MV2_Scatter_function == &MPIR_Scatter_MV2_two_level_Direct) || (MV2_Scatter_function == &MPIR_Scatter_MV2_two_level_Binomial)) { if( comm->is_blocked()) { MV2_Scatter_intra_function = mv2_scatter_thresholds_table[conf_index][range].intra_node[range_threshold_intra] .MV2_pt_Scatter_function; mpi_errno = MV2_Scatter_function(sendbuf, sendcnt, sendtype, recvbuf, recvcnt, recvtype, root, comm); } else { mpi_errno = MPIR_Scatter_MV2_Binomial(sendbuf, sendcnt, sendtype, recvbuf, recvcnt, recvtype, root, comm); } } else { mpi_errno = MV2_Scatter_function(sendbuf, sendcnt, sendtype, recvbuf, recvcnt, recvtype, root, comm); } return (mpi_errno); }