/* requires a contiguous/continous buffer on root though */ int MPIDO_Scatterv_bcast(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, int *mpierrno) { const int rank = comm_ptr->rank; const int size = comm_ptr->local_size; char *tempbuf; int i, sum = 0, dtsize, rc=0; for (i = 0; i < size; i++) if (sendcounts > 0) sum += sendcounts[i]; MPIDI_Datatype_get_data_size(1, recvtype, dtsize); if (rank != root) { tempbuf = MPIU_Malloc(dtsize * sum); if (!tempbuf) return MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, __FUNCTION__, __LINE__, MPI_ERR_OTHER, "**nomem", 0); } else tempbuf = sendbuf; /* Switch to comm->coll_fns->fn() */ rc = MPIDO_Bcast(tempbuf, sum, sendtype, root, comm_ptr, mpierrno); if(rank == root && recvbuf == MPI_IN_PLACE) return rc; memcpy(recvbuf, tempbuf + displs[rank], sendcounts[rank] * dtsize); if (rank != root) MPIU_Free(tempbuf); return rc; }
/* ****************************************************************** */ int MPIDO_Allgather_bcast(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Aint send_true_lb, MPI_Aint recv_true_lb, size_t send_size, size_t recv_size, MPID_Comm * comm_ptr, int *mpierrno) { int i, np, rc = 0; MPI_Aint extent; const int rank = comm_ptr->rank; np = comm_ptr ->local_size; MPID_Datatype_get_extent_macro(recvtype, extent); MPIU_Ensure_Aint_fits_in_pointer ((MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf + np * recvcount * extent)); if (sendbuf != MPI_IN_PLACE) { void *destbuf = recvbuf + rank * recvcount * extent; MPIR_Localcopy(sendbuf, sendcount, sendtype, destbuf, recvcount, recvtype); } /* this code should either abort on first error or somehow aggregate * error codes, esp since it calls internal routines */ for (i = 0; i < np; i++) { void *destbuf = recvbuf + i * recvcount * extent; /* Switch to comm->coll_fns->fn() */ rc = MPIDO_Bcast(destbuf, recvcount, recvtype, i, comm_ptr, mpierrno); } return rc; }
/* ****************************************************************** */ int MPIDO_Allgatherv_bcast(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, int buffer_sum, const int *displs, MPI_Datatype recvtype, MPI_Aint send_true_lb, MPI_Aint recv_true_lb, size_t send_size, size_t recv_size, MPID_Comm * comm_ptr, int *mpierrno) { const int rank = comm_ptr->rank; TRACE_ERR("Entering MPIDO_Allgatherv_bcast\n"); int i, rc=MPI_ERR_INTERN; MPI_Aint extent; MPID_Datatype_get_extent_macro(recvtype, extent); if (sendbuf != MPI_IN_PLACE) { void *destbuffer = recvbuf + displs[rank] * extent; MPIR_Localcopy(sendbuf, sendcount, sendtype, destbuffer, recvcounts[rank], recvtype); } TRACE_ERR("Calling MPIDO_Bcasts in MPIDO_Allgatherv_bcast\n"); for (i = 0; i < comm_ptr->local_size; i++) { void *destbuffer = recvbuf + displs[i] * extent; /* Switch to comm->coll_fns->fn() */ rc = MPIDO_Bcast(destbuffer, recvcounts[i], recvtype, i, comm_ptr, mpierrno); } TRACE_ERR("Leaving MPIDO_Allgatherv_bcast\n"); return rc; }
int MPIDO_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, int *mpierrno) { TRACE_ERR("in mpido_bcast\n"); const size_t BCAST_LIMIT = 0x40000000; int data_contig, rc; void *data_buffer = NULL, *noncontig_buff = NULL; volatile unsigned active = 1; MPI_Aint data_true_lb = 0; MPID_Datatype *data_ptr; MPID_Segment segment; MPIDI_Post_coll_t bcast_post; const struct MPIDI_Comm* const mpid = &(comm_ptr->mpid); const int rank = comm_ptr->rank; #if ASSERT_LEVEL==0 /* We can't afford the tracing in ndebug/performance libraries */ const unsigned verbose = 0; #else const unsigned verbose = (MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL) && (rank == 0); #endif const int selected_type = mpid->user_selected_type[PAMI_XFER_BROADCAST]; /* Must calculate data_size based on count=1 in case it's total size is > integer */ int data_size_one; MPIDI_Datatype_get_info(1, datatype, data_contig, data_size_one, data_ptr, data_true_lb); /* do this calculation once and use twice */ const size_t data_size_sz = (size_t)data_size_one*(size_t)count; if(unlikely(verbose)) fprintf(stderr,"bcast count %d, size %d (%#zX), root %d, buffer %p\n", count,data_size_one, (size_t)data_size_one*(size_t)count, root,buffer); if(unlikely( data_size_sz > BCAST_LIMIT) ) { void *new_buffer=buffer; int c, new_count = (int)BCAST_LIMIT/data_size_one; MPID_assert(new_count > 0); for(c=1; ((size_t)c*(size_t)new_count) <= (size_t)count; ++c) { if ((rc = MPIDO_Bcast(new_buffer, new_count, datatype, root, comm_ptr, mpierrno)) != MPI_SUCCESS) return rc; new_buffer = (char*)new_buffer + (size_t)data_size_one*(size_t)new_count; } new_count = count % new_count; /* 0 is ok, just returns no-op */ return MPIDO_Bcast(new_buffer, new_count, datatype, root, comm_ptr, mpierrno); } /* Must use data_size based on count for byte bcast processing. Previously calculated as a size_t but large data_sizes were handled above so this cast to int should be fine here. */ const int data_size = (int)data_size_sz; if(selected_type == MPID_COLL_USE_MPICH || data_size == 0) { if(unlikely(verbose)) fprintf(stderr,"Using MPICH bcast algorithm\n"); MPIDI_Update_last_algorithm(comm_ptr,"BCAST_MPICH"); return MPIR_Bcast_intra(buffer, count, datatype, root, comm_ptr, mpierrno); } data_buffer = (char *)buffer + data_true_lb; if(!data_contig) { noncontig_buff = MPIU_Malloc(data_size); data_buffer = noncontig_buff; if(noncontig_buff == NULL) { MPID_Abort(NULL, MPI_ERR_NO_SPACE, 1, "Fatal: Cannot allocate pack buffer"); } if(rank == root) { DLOOP_Offset last = data_size; MPID_Segment_init(buffer, count, datatype, &segment, 0); MPID_Segment_pack(&segment, 0, &last, noncontig_buff); } } pami_xfer_t bcast; pami_algorithm_t my_bcast; const pami_metadata_t *my_md = (pami_metadata_t *)NULL; int queryreq = 0; bcast.cb_done = cb_bcast; bcast.cookie = (void *)&active; bcast.cmd.xfer_broadcast.root = MPIDI_Task_to_endpoint(MPID_VCR_GET_LPID(comm_ptr->vcr, root), 0); bcast.algorithm = mpid->user_selected[PAMI_XFER_BROADCAST]; bcast.cmd.xfer_broadcast.buf = data_buffer; bcast.cmd.xfer_broadcast.type = PAMI_TYPE_BYTE; /* Needs to be sizeof(type)*count since we are using bytes as * the generic type */ bcast.cmd.xfer_broadcast.typecount = data_size; if(selected_type == MPID_COLL_OPTIMIZED) { TRACE_ERR("Optimized bcast (%s) and (%s) were pre-selected\n", mpid->opt_protocol_md[PAMI_XFER_BROADCAST][0].name, mpid->opt_protocol_md[PAMI_XFER_BROADCAST][1].name); if(mpid->cutoff_size[PAMI_XFER_BROADCAST][1] != 0)/* SSS: There is FCA cutoff (FCA only sets cutoff for [PAMI_XFER_BROADCAST][1]) */ { if(data_size <= mpid->cutoff_size[PAMI_XFER_BROADCAST][1]) { my_bcast = mpid->opt_protocol[PAMI_XFER_BROADCAST][1]; my_md = &mpid->opt_protocol_md[PAMI_XFER_BROADCAST][1]; queryreq = mpid->must_query[PAMI_XFER_BROADCAST][1]; } else { return MPIR_Bcast_intra(buffer, count, datatype, root, comm_ptr, mpierrno); } } if(data_size > mpid->cutoff_size[PAMI_XFER_BROADCAST][0]) { my_bcast = mpid->opt_protocol[PAMI_XFER_BROADCAST][1]; my_md = &mpid->opt_protocol_md[PAMI_XFER_BROADCAST][1]; queryreq = mpid->must_query[PAMI_XFER_BROADCAST][1]; } else { my_bcast = mpid->opt_protocol[PAMI_XFER_BROADCAST][0]; my_md = &mpid->opt_protocol_md[PAMI_XFER_BROADCAST][0]; queryreq = mpid->must_query[PAMI_XFER_BROADCAST][0]; } } else { TRACE_ERR("Bcast (%s) was specified by user\n", mpid->user_metadata[PAMI_XFER_BROADCAST].name); my_bcast = mpid->user_selected[PAMI_XFER_BROADCAST]; my_md = &mpid->user_metadata[PAMI_XFER_BROADCAST]; queryreq = selected_type; } bcast.algorithm = my_bcast; if(unlikely(queryreq == MPID_COLL_ALWAYS_QUERY || queryreq == MPID_COLL_CHECK_FN_REQUIRED)) { metadata_result_t result = {0}; TRACE_ERR("querying bcast protocol %s, type was: %d\n", my_md->name, queryreq); if(my_md->check_fn != NULL) /* calling the check fn is sufficient */ { metadata_result_t result = {0}; result = my_md->check_fn(&bcast); result.check.nonlocal = 0; /* #warning REMOVE THIS WHEN IMPLEMENTED */ } else /* no check_fn, manually look at the metadata fields */ { TRACE_ERR("Optimzed selection line %d\n",__LINE__); /* Check if the message range if restricted */ if(my_md->check_correct.values.rangeminmax) { if((my_md->range_lo <= data_size) && (my_md->range_hi >= data_size)) ; /* ok, algorithm selected */ else { result.check.range = 1; if(unlikely(verbose)) { fprintf(stderr,"message size (%u) outside range (%zu<->%zu) for %s.\n", data_size, my_md->range_lo, my_md->range_hi, my_md->name); } } } /* \todo check the rest of the metadata */ } TRACE_ERR("bitmask: %#X\n", result.bitmask); if(result.bitmask) { if(unlikely(verbose)) fprintf(stderr,"Using MPICH bcast algorithm - query fn failed\n"); MPIDI_Update_last_algorithm(comm_ptr,"BCAST_MPICH"); return MPIR_Bcast_intra(buffer, count, datatype, root, comm_ptr, mpierrno); } if(my_md->check_correct.values.asyncflowctl && !(--(comm_ptr->mpid.num_requests))) { comm_ptr->mpid.num_requests = MPIDI_Process.optimized.num_requests; int tmpmpierrno; if(unlikely(verbose)) fprintf(stderr,"Query barrier required for %s\n", my_md->name); MPIDO_Barrier(comm_ptr, &tmpmpierrno); } } if(unlikely(verbose)) { unsigned long long int threadID; MPIU_Thread_id_t tid; MPIU_Thread_self(&tid); threadID = (unsigned long long int)tid; fprintf(stderr,"<%llx> Using protocol %s for bcast on %u\n", threadID, my_md->name, (unsigned) comm_ptr->context_id); } MPIDI_Context_post(MPIDI_Context[0], &bcast_post.state, MPIDI_Pami_post_wrapper, (void *)&bcast); MPIDI_Update_last_algorithm(comm_ptr, my_md->name); MPID_PROGRESS_WAIT_WHILE(active); TRACE_ERR("bcast done\n"); if(!data_contig) { if(rank != root) MPIR_Localcopy(noncontig_buff, data_size, MPI_CHAR, buffer, count, datatype); MPIU_Free(noncontig_buff); } TRACE_ERR("leaving bcast\n"); return 0; }