/* ****************************************************************** */ int MPIDO_Allgather_allreduce(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Aint send_true_lb, MPI_Aint recv_true_lb, size_t send_size, size_t recv_size, MPID_Comm * comm_ptr, int *mpierrno) { int rc, rank; char *startbuf = NULL; char *destbuf = NULL; rank = comm_ptr->rank; startbuf = (char *) recvbuf + recv_true_lb; destbuf = startbuf + rank * send_size; memset(startbuf, 0, rank * send_size); memset(destbuf + send_size, 0, recv_size - (rank + 1) * send_size); if (sendbuf != MPI_IN_PLACE) { char *outputbuf = (char *) sendbuf + send_true_lb; memcpy(destbuf, outputbuf, send_size); } /* TODO: Change to PAMI */ rc = MPIDO_Allreduce(MPI_IN_PLACE, startbuf, recv_size/sizeof(int), MPI_INT, MPI_BOR, comm_ptr, mpierrno); return rc; }
int MPIDO_Scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPID_Comm * comm) { MPIDO_Embedded_Info_Set * properties = &(comm->dcmf.properties); MPID_Datatype * data_ptr; MPI_Aint true_lb = 0; char *sbuf = sendbuf, *rbuf = recvbuf; int contig, nbytes = 0, rc = 0; int rank = comm->rank; int success = 1; if (rank == root) { if (recvtype != MPI_DATATYPE_NULL && recvcount >= 0) { MPIDI_Datatype_get_info(sendcount, sendtype, contig, nbytes, data_ptr, true_lb); if (!contig) success = 0; } else success = 0; if (success) { if (recvtype != MPI_DATATYPE_NULL && recvcount >= 0) { MPIDI_Datatype_get_info(recvcount, recvtype, contig, nbytes, data_ptr, true_lb); if (!contig) success = 0; } else success = 0; } } else { if (sendtype != MPI_DATATYPE_NULL && sendcount >= 0) { MPIDI_Datatype_get_info(recvcount, recvtype, contig, nbytes, data_ptr, true_lb); if (!contig) success = 0; } else success = 0; } if (MPIDO_INFO_ISSET(properties, MPIDO_USE_MPICH_SCATTER) || MPIDO_INFO_ISSET(properties, MPIDO_IRREG_COMM) || (!MPIDO_INFO_ISSET(properties, MPIDO_USE_TREE_BCAST) && nbytes <= 64)) { comm->dcmf.last_algorithm = MPIDO_USE_MPICH_SCATTER; return MPIR_Scatter_intra(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); } /* set the internal control flow to disable internal star tuning */ STAR_info.internal_control_flow = 1; MPIDO_Allreduce(MPI_IN_PLACE, &success, 1, MPI_INT, MPI_BAND, comm); /* reset flag */ STAR_info.internal_control_flow = 0; if (!success) return MPIR_Scatter_intra(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); MPIDI_VerifyBuffer(sendbuf, sbuf, true_lb); MPIDI_VerifyBuffer(recvbuf, rbuf, true_lb); if (!STAR_info.enabled || STAR_info.internal_control_flow || STAR_info.scatter_algorithms == 1) { if (MPIDO_INFO_ISSET(properties, MPIDO_USE_BCAST_SCATTER)) { comm->dcmf.last_algorithm = MPIDO_USE_BCAST_SCATTER; return MPIDO_Scatter_bcast(sbuf, sendcount, sendtype, rbuf, recvcount, recvtype, root, comm); } } else { int id; unsigned char same_callsite = 1; void ** tb_ptr = (void **) MPIU_Malloc(sizeof(void *) * STAR_info.traceback_levels); /* set the internal control flow to disable internal star tuning */ STAR_info.internal_control_flow = 1; /* get backtrace info for caller to this func, use that as callsite_id */ backtrace(tb_ptr, STAR_info.traceback_levels); id = (int) tb_ptr[STAR_info.traceback_levels - 1]; /* find out if all participants agree on the callsite id */ if (STAR_info.agree_on_callsite) { int tmp[2], result[2]; tmp[0] = id; tmp[1] = ~id; MPIDO_Allreduce(tmp, result, 2, MPI_UNSIGNED_LONG, MPI_MAX, comm); if (result[0] != (~result[1])) same_callsite = 0; } if (same_callsite) { STAR_Callsite collective_site; /* create a signature callsite info for this particular call site */ collective_site.call_type = SCATTER_CALL; collective_site.comm = comm; collective_site.bytes = nbytes; collective_site.op_type_support = MPIDO_SUPPORT_NOT_NEEDED; collective_site.id = id; rc = STAR_Scatter(sbuf, sendcount, sendtype, rbuf, recvcount, recvtype, root, &collective_site, STAR_scatter_repository, STAR_info.scatter_algorithms); } if (rc == STAR_FAILURE || !same_callsite) rc = MPIR_Scatter_intra(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); /* unset the internal control flow */ STAR_info.internal_control_flow = 0; MPIU_Free(tb_ptr); } return rc; }
int MPIDO_Allgather_allreduce(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Aint send_true_lb, MPI_Aint recv_true_lb, size_t send_size, size_t recv_size, MPID_Comm * comm_ptr, int *mpierrno) { int rc, i; char *startbuf = NULL; char *destbuf = NULL; const int rank = comm_ptr->rank; startbuf = (char *) recvbuf + recv_true_lb; destbuf = startbuf + rank * send_size; if (sendbuf != MPI_IN_PLACE) { char *outputbuf = (char *) sendbuf + send_true_lb; memcpy(destbuf, outputbuf, send_size); } /* TODO: Change to PAMI */ /*Do a convert and then do the allreudce*/ if ( recv_size <= MAX_ALLGATHER_ALLREDUCE_BUFFER_SIZE && (send_size & 0x3)==0 && /*integer/long allgathers only*/ (sendtype != MPI_DOUBLE || recvtype != MPI_DOUBLE)) { double *tmprbuf = (double *)MPL_malloc(recv_size*2); if (tmprbuf == NULL) goto direct_algo; /*skip int to fp conversion and go to direct algo*/ double *tmpsbuf = tmprbuf + (rank*send_size)/sizeof(int); int *sibuf = (int *) destbuf; memset(tmprbuf, 0, rank*send_size*2); memset(tmpsbuf + send_size/sizeof(int), 0, (recv_size - (rank + 1)*send_size)*2); for(i = 0; i < (send_size/sizeof(int)); ++i) tmpsbuf[i] = (double)sibuf[i]; /* Switch to comm->coll_fns->fn() */ rc = MPIDO_Allreduce(MPI_IN_PLACE, tmprbuf, recv_size/sizeof(int), MPI_DOUBLE, MPI_SUM, comm_ptr, mpierrno); sibuf = (int *) startbuf; for(i = 0; i < (rank*send_size/sizeof(int)); ++i) sibuf[i] = (int)tmprbuf[i]; for(i = (rank+1)*send_size/sizeof(int); i < recv_size/sizeof(int); ++i) sibuf[i] = (int)tmprbuf[i]; MPL_free(tmprbuf); return rc; } direct_algo: memset(startbuf, 0, rank * send_size); memset(destbuf + send_size, 0, recv_size - (rank + 1) * send_size); if (sendtype == MPI_DOUBLE && recvtype == MPI_DOUBLE) /* Switch to comm->coll_fns->fn() */ rc = MPIDO_Allreduce(MPI_IN_PLACE, startbuf, recv_size/sizeof(double), MPI_DOUBLE, MPI_SUM, comm_ptr, mpierrno); else /* Switch to comm->coll_fns->fn() */ rc = MPIDO_Allreduce(MPI_IN_PLACE, startbuf, recv_size/sizeof(int), MPI_UNSIGNED, MPI_BOR, comm_ptr, mpierrno); return rc; }
int MPIDO_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPID_Comm *comm_ptr, int *mpierrno) { #ifndef HAVE_PAMI_IN_PLACE if (sendbuf == MPI_IN_PLACE) { MPID_Abort (NULL, 0, 1, "'MPI_IN_PLACE' requries support for `PAMI_IN_PLACE`"); return -1; } #endif MPID_Datatype *dt_null = NULL; MPI_Aint true_lb = 0; int dt_contig ATTRIBUTE((unused)), tsize; int mu; char *sbuf, *rbuf; pami_data_function pop; pami_type_t pdt; int rc; int alg_selected = 0; const int rank = comm_ptr->rank; #if ASSERT_LEVEL==0 /* We can't afford the tracing in ndebug/performance libraries */ const unsigned verbose = 0; #else const unsigned verbose = (MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL) && (rank == 0); #endif const struct MPIDI_Comm* const mpid = &(comm_ptr->mpid); const int selected_type = mpid->user_selected_type[PAMI_XFER_REDUCE]; rc = MPIDI_Datatype_to_pami(datatype, &pdt, op, &pop, &mu); if(unlikely(verbose)) fprintf(stderr,"reduce - rc %u, root %u, count %d, dt: %p, op: %p, mu: %u, selectedvar %u != %u (MPICH) sendbuf %p, recvbuf %p\n", rc, root, count, pdt, pop, mu, (unsigned)selected_type, MPID_COLL_USE_MPICH,sendbuf, recvbuf); pami_xfer_t reduce; pami_algorithm_t my_reduce=0; const pami_metadata_t *my_md = (pami_metadata_t *)NULL; int queryreq = 0; volatile unsigned reduce_active = 1; MPIDI_Datatype_get_info(count, datatype, dt_contig, tsize, dt_null, true_lb); rbuf = (char *)recvbuf + true_lb; sbuf = (char *)sendbuf + true_lb; if(sendbuf == MPI_IN_PLACE) { if(unlikely(verbose)) fprintf(stderr,"reduce MPI_IN_PLACE send buffering (%d,%d)\n",count,tsize); sbuf = PAMI_IN_PLACE; } reduce.cb_done = reduce_cb_done; reduce.cookie = (void *)&reduce_active; if(mpid->optreduce) /* GLUE_ALLREDUCE */ { char* tbuf = NULL; if(unlikely(verbose)) fprintf(stderr,"Using protocol GLUE_ALLREDUCE for reduce (%d,%d)\n",count,tsize); MPIDI_Update_last_algorithm(comm_ptr, "REDUCE_OPT_ALLREDUCE"); void *destbuf = recvbuf; if(rank != root) /* temp buffer for non-root destbuf */ { tbuf = destbuf = MPL_malloc(tsize); } /* Switch to comm->coll_fns->fn() */ MPIDO_Allreduce(sendbuf, destbuf, count, datatype, op, comm_ptr, mpierrno); if(tbuf) MPL_free(tbuf); return 0; } if(selected_type == MPID_COLL_USE_MPICH || rc != MPI_SUCCESS) { if(unlikely(verbose)) fprintf(stderr,"Using MPICH reduce algorithm\n"); #if CUDA_AWARE_SUPPORT if(MPIDI_Process.cuda_aware_support_on) { MPI_Aint dt_extent; MPID_Datatype_get_extent_macro(datatype, dt_extent); char *scbuf = NULL; char *rcbuf = NULL; int is_send_dev_buf = MPIDI_cuda_is_device_buf(sendbuf); int is_recv_dev_buf = MPIDI_cuda_is_device_buf(recvbuf); if(is_send_dev_buf) { scbuf = MPL_malloc(dt_extent * count); cudaError_t cudaerr = CudaMemcpy(scbuf, sendbuf, dt_extent * count, cudaMemcpyDeviceToHost); if (cudaSuccess != cudaerr) fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr)); } else scbuf = sendbuf; if(is_recv_dev_buf) { rcbuf = MPL_malloc(dt_extent * count); if(sendbuf == MPI_IN_PLACE) { cudaError_t cudaerr = CudaMemcpy(rcbuf, recvbuf, dt_extent * count, cudaMemcpyDeviceToHost); if (cudaSuccess != cudaerr) fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr)); } else memset(rcbuf, 0, dt_extent * count); } else rcbuf = recvbuf; int cuda_res = MPIR_Reduce(scbuf, rcbuf, count, datatype, op, root, comm_ptr, mpierrno); if(is_send_dev_buf)MPL_free(scbuf); if(is_recv_dev_buf) { cudaError_t cudaerr = CudaMemcpy(recvbuf, rcbuf, dt_extent * count, cudaMemcpyHostToDevice); if (cudaSuccess != cudaerr) fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr)); MPL_free(rcbuf); } return cuda_res; } else #endif return MPIR_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, mpierrno); } if(selected_type == MPID_COLL_OPTIMIZED) { if((mpid->cutoff_size[PAMI_XFER_REDUCE][0] == 0) || (mpid->cutoff_size[PAMI_XFER_REDUCE][0] >= tsize && mpid->cutoff_size[PAMI_XFER_REDUCE][0] > 0)) { TRACE_ERR("Optimized Reduce (%s) was pre-selected\n", mpid->opt_protocol_md[PAMI_XFER_REDUCE][0].name); my_reduce = mpid->opt_protocol[PAMI_XFER_REDUCE][0]; my_md = &mpid->opt_protocol_md[PAMI_XFER_REDUCE][0]; queryreq = mpid->must_query[PAMI_XFER_REDUCE][0]; } } else { TRACE_ERR("Optimized reduce (%s) was specified by user\n", mpid->user_metadata[PAMI_XFER_REDUCE].name); my_reduce = mpid->user_selected[PAMI_XFER_REDUCE]; my_md = &mpid->user_metadata[PAMI_XFER_REDUCE]; queryreq = selected_type; } reduce.algorithm = my_reduce; reduce.cmd.xfer_reduce.sndbuf = sbuf; reduce.cmd.xfer_reduce.rcvbuf = rbuf; reduce.cmd.xfer_reduce.stype = pdt; reduce.cmd.xfer_reduce.rtype = pdt; reduce.cmd.xfer_reduce.stypecount = count; reduce.cmd.xfer_reduce.rtypecount = count; reduce.cmd.xfer_reduce.op = pop; reduce.cmd.xfer_reduce.root = MPIDI_Task_to_endpoint(MPID_VCR_GET_LPID(comm_ptr->vcr, root), 0); if(unlikely(queryreq == MPID_COLL_ALWAYS_QUERY || queryreq == MPID_COLL_CHECK_FN_REQUIRED)) { metadata_result_t result = {0}; TRACE_ERR("Querying reduce protocol %s, type was %d\n", my_md->name, queryreq); if(my_md->check_fn == NULL) { /* process metadata bits */ if((!my_md->check_correct.values.inplace) && (sendbuf == MPI_IN_PLACE)) result.check.unspecified = 1; if(my_md->check_correct.values.rangeminmax) { MPI_Aint data_true_lb ATTRIBUTE((unused)); MPID_Datatype *data_ptr; int data_size, data_contig ATTRIBUTE((unused)); MPIDI_Datatype_get_info(count, datatype, data_contig, data_size, data_ptr, data_true_lb); if((my_md->range_lo <= data_size) && (my_md->range_hi >= data_size)) ; /* ok, algorithm selected */ else { result.check.range = 1; if(unlikely(verbose)) { fprintf(stderr,"message size (%u) outside range (%zu<->%zu) for %s.\n", data_size, my_md->range_lo, my_md->range_hi, my_md->name); } } } }
int MPIDO_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, MPID_Comm * comm) { /* function pointer to be used to point to approperiate algorithm */ allgatherv_fptr func = NULL; /* Check the nature of the buffers */ MPID_Datatype *dt_null = NULL; MPI_Aint send_true_lb = 0; MPI_Aint recv_true_lb = 0; size_t send_size = 0; size_t recv_size = 0; MPIDO_Coll_config config = {1,1,1,1,1}; double msize; int i, rc, buffer_sum = 0, np = comm->local_size; char use_tree_reduce, use_alltoall, use_rect_async, use_bcast; char *sbuf, *rbuf; MPIDO_Embedded_Info_Set * comm_prop = &(comm->dcmf.properties); MPIDO_Embedded_Info_Set * coll_prop = &MPIDI_CollectiveProtocols.properties; unsigned char userenvset = MPIDO_INFO_ISSET(comm_prop, MPIDO_ALLGATHERV_ENVVAR); if (MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_MPICH_ALLGATHERV)) { comm->dcmf.last_algorithm = MPIDO_USE_MPICH_ALLGATHERV; return MPIR_Allgatherv_intra(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); } MPIDI_Datatype_get_info(1, recvtype, config.recv_contig, recv_size, dt_null, recv_true_lb); if (sendbuf != MPI_IN_PLACE) { MPIDI_Datatype_get_info(sendcount, sendtype, config.send_contig, send_size, dt_null, send_true_lb); MPIDI_VerifyBuffer(sendbuf, sbuf, send_true_lb); } if (displs[0]) config.recv_continuous = 0; for (i = 1; i < np; i++) { buffer_sum += recvcounts[i - 1]; if (buffer_sum != displs[i]) { config.recv_continuous = 0; break; } } buffer_sum += recvcounts[np - 1]; buffer_sum *= recv_size; msize = (double)buffer_sum / (double)np; MPIDI_VerifyBuffer(recvbuf, rbuf, (recv_true_lb + buffer_sum)); if (MPIDO_INFO_ISSET(coll_prop, MPIDO_USE_PREALLREDUCE_ALLGATHERV)) { STAR_info.internal_control_flow = 1; MPIDO_Allreduce(MPI_IN_PLACE, &config, 5, MPI_INT, MPI_BAND, comm); STAR_info.internal_control_flow = 0; } if (!STAR_info.enabled || STAR_info.internal_control_flow || ((double)buffer_sum / (double)np) < STAR_info.allgather_threshold) { use_tree_reduce = MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_TREE_ALLREDUCE) && MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_ALLREDUCE_ALLGATHERV) && config.recv_contig && config.send_contig && config.recv_continuous && buffer_sum % sizeof(int) == 0; use_alltoall = MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_TORUS_ALLTOALL) && MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_ALLTOALL_ALLGATHERV) && config.recv_contig && config.send_contig; use_rect_async = MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_ARECT_BCAST_ALLGATHERV) && MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_ARECT_BCAST) && config.recv_contig && config.send_contig; use_bcast = //MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_TREE_BCAST) && MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_BCAST_ALLGATHERV); if(userenvset) { if(use_bcast) { func = MPIDO_Allgatherv_bcast; comm->dcmf.last_algorithm = MPIDO_USE_BCAST_ALLGATHERV; } if(use_tree_reduce) { func = MPIDO_Allgatherv_allreduce; comm->dcmf.last_algorithm = MPIDO_USE_ALLREDUCE_ALLGATHERV; } if(use_alltoall) { func = MPIDO_Allgatherv_alltoall; comm->dcmf.last_algorithm = MPIDO_USE_ALLTOALL_ALLGATHERV; } if(use_rect_async) { func = MPIDO_Allgatherv_bcast_rect_async; comm->dcmf.last_algorithm = MPIDO_USE_ARECT_BCAST_ALLGATHERV; } } else { if (!MPIDO_INFO_ISSET(comm_prop, MPIDO_IRREG_COMM)) { if (np <= 512) { if (use_tree_reduce && msize < 128 * np) { func = MPIDO_Allgatherv_allreduce; comm->dcmf.last_algorithm = MPIDO_USE_ALLREDUCE_ALLGATHERV; } if (!func && use_bcast && msize >= 128 * np) { func = MPIDO_Allgatherv_bcast; comm->dcmf.last_algorithm = MPIDO_USE_BCAST_ALLGATHERV; } if (!func && use_alltoall && msize > 128 && msize <= 8*np) { func = MPIDO_Allgatherv_alltoall; comm->dcmf.last_algorithm = MPIDO_USE_ALLTOALL_ALLGATHERV; } if (!func && use_rect_async && msize > 8*np) { func = MPIDO_Allgatherv_bcast_rect_async; comm->dcmf.last_algorithm = MPIDO_USE_ARECT_BCAST_ALLGATHERV; } } else { if (use_tree_reduce && msize < 512) { func = MPIDO_Allgatherv_allreduce; comm->dcmf.last_algorithm = MPIDO_USE_ALLREDUCE_ALLGATHERV; } if (!func && use_alltoall && msize > 128 * (512.0 / (float) np) && msize <= 128) { func = MPIDO_Allgatherv_alltoall; comm->dcmf.last_algorithm = MPIDO_USE_ALLTOALL_ALLGATHERV; } if (!func && use_rect_async && msize >= 512 && msize <= 65536) { func = MPIDO_Allgatherv_bcast_rect_async; comm->dcmf.last_algorithm = MPIDO_USE_ARECT_BCAST_ALLGATHERV; } if (!func && use_bcast && msize > 65536) { func = MPIDO_Allgatherv_bcast; comm->dcmf.last_algorithm = MPIDO_USE_BCAST_ALLGATHERV; } } } else { if (msize >= 64 && use_alltoall) { func = MPIDO_Allgatherv_alltoall; comm->dcmf.last_algorithm = MPIDO_USE_ALLTOALL_ALLGATHERV; } } } if(!func) { comm->dcmf.last_algorithm = MPIDO_USE_MPICH_ALLGATHERV; return MPIR_Allgatherv_intra(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); } rc = (func)(sendbuf, sendcount, sendtype, recvbuf, recvcounts, buffer_sum, displs, recvtype, send_true_lb, recv_true_lb, send_size, recv_size, comm); } else { STAR_Callsite collective_site; void ** tb_ptr = (void **) MPIU_Malloc(sizeof(void *) * STAR_info.traceback_levels); /* set the internal control flow to disable internal star tuning */ STAR_info.internal_control_flow = 1; /* get backtrace info for caller to this func, use that as callsite_id */ backtrace(tb_ptr, STAR_info.traceback_levels); /* create a signature callsite info for this particular call site */ collective_site.call_type = ALLGATHERV_CALL; collective_site.comm = comm; collective_site.bytes = buffer_sum; collective_site.op_type_support = MPIDO_SUPPORT_NOT_NEEDED; collective_site.buff_attributes[0] = config.send_contig; collective_site.buff_attributes[1] = config.recv_contig; collective_site.buff_attributes[2] = config.recv_continuous; /* decide buffer alignment */ collective_site.buff_attributes[3] = 1; /* assume aligned */ if (((unsigned)sendbuf & 0x0F) || ((unsigned)recvbuf & 0x0F)) collective_site.buff_attributes[3] = 0; /* set to not aligned */ collective_site.id = (int) tb_ptr[STAR_info.traceback_levels - 1]; rc = STAR_Allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, buffer_sum, displs, recvtype, send_true_lb, recv_true_lb, send_size, recv_size, &collective_site, STAR_allgatherv_repository, STAR_info.allgatherv_algorithms); /* unset the internal control flow */ STAR_info.internal_control_flow = 0; if (rc == STAR_FAILURE) rc = MPIR_Allgatherv_intra(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); MPIU_Free(tb_ptr); } return rc; }
int MPIDO_Allgatherv_allreduce(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, int buffer_sum, const int *displs, MPI_Datatype recvtype, MPI_Aint send_true_lb, MPI_Aint recv_true_lb, size_t send_size, size_t recv_size, MPID_Comm * comm_ptr, int *mpierrno) { int start, rc, i; int length; char *startbuf = NULL; char *destbuf = NULL; const int rank = comm_ptr->rank; TRACE_ERR("Entering MPIDO_Allgatherv_allreduce\n"); startbuf = (char *) recvbuf + recv_true_lb; destbuf = startbuf + displs[rank] * recv_size; if (sendbuf != MPI_IN_PLACE) { char *outputbuf = (char *) sendbuf + send_true_lb; memcpy(destbuf, outputbuf, send_size); } //printf("buffer_sum %d, send_size %d recv_size %d\n", buffer_sum, // (int)send_size, (int)recv_size); /* TODO: Change to PAMI */ /*integer/long/double allgathers only*/ /*Do a convert and then do the allreudce*/ if ( buffer_sum <= MAX_ALLGATHERV_ALLREDUCE_BUFFER_SIZE && (send_size & 0x3)==0 && (recv_size & 0x3)==0) { double *tmprbuf = (double *)MPIU_Malloc(buffer_sum*2); if (tmprbuf == NULL) goto direct_algo; /*skip int to fp conversion and go to direct algo*/ double *tmpsbuf = tmprbuf + (displs[rank]*recv_size)/sizeof(int); int *sibuf = (int *) destbuf; memset(tmprbuf, 0, displs[rank]*recv_size*2); start = (displs[rank] + recvcounts[rank]) * recv_size; length = buffer_sum - (displs[rank] + recvcounts[rank]) * recv_size; memset(tmprbuf + start/sizeof(int), 0, length*2); for(i = 0; i < (send_size/sizeof(int)); ++i) tmpsbuf[i] = (double)sibuf[i]; /* Switch to comm->coll_fns->fn() */ rc = MPIDO_Allreduce(MPI_IN_PLACE, tmprbuf, buffer_sum/sizeof(int), MPI_DOUBLE, MPI_SUM, comm_ptr, mpierrno); sibuf = (int *) startbuf; for(i = 0; i < (displs[rank]*recv_size/sizeof(int)); ++i) sibuf[i] = (int)tmprbuf[i]; for(i = start/sizeof(int); i < buffer_sum/sizeof(int); ++i) sibuf[i] = (int)tmprbuf[i]; MPIU_Free(tmprbuf); return rc; } direct_algo: start = 0; length = displs[rank] * recv_size; memset(startbuf + start, 0, length); start = (displs[rank] + recvcounts[rank]) * recv_size; length = buffer_sum - (displs[rank] + recvcounts[rank]) * recv_size; memset(startbuf + start, 0, length); TRACE_ERR("Calling MPIDO_Allreduce from MPIDO_Allgatherv_allreduce\n"); /* Switch to comm->coll_fns->fn() */ rc = MPIDO_Allreduce(MPI_IN_PLACE, startbuf, buffer_sum/sizeof(unsigned), MPI_UNSIGNED, MPI_BOR, comm_ptr, mpierrno); TRACE_ERR("Leaving MPIDO_Allgatherv_allreduce\n"); return rc; }