int MPIX_Get_accumulate_x(const void *origin_addr, MPI_Count origin_count, MPI_Datatype origin_datatype, void *result_addr, MPI_Count result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, MPI_Count target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win) { int rc = MPI_SUCCESS; if (likely (origin_count <= bigmpi_int_max && result_count <= bigmpi_int_max && target_count <= bigmpi_int_max)) { rc = MPI_Get_accumulate(origin_addr, origin_count, origin_datatype, result_addr, result_count, result_datatype, target_rank, target_disp, target_count, target_datatype, op, win); } else { MPI_Datatype neworigin_datatype, newresult_datatype, newtarget_datatype; MPIX_Type_contiguous_x(origin_count, origin_datatype, &neworigin_datatype); MPIX_Type_contiguous_x(result_count, result_datatype, &newresult_datatype); MPIX_Type_contiguous_x(target_count, target_datatype, &newtarget_datatype); MPI_Type_commit(&neworigin_datatype); MPI_Type_commit(&newresult_datatype); MPI_Type_commit(&newtarget_datatype); rc = MPI_Get_accumulate(origin_addr, 1, neworigin_datatype, result_addr, 1, newresult_datatype, target_rank, target_disp, 1, newtarget_datatype, op, win); MPI_Type_free(&neworigin_datatype); MPI_Type_free(&newresult_datatype); MPI_Type_free(&newtarget_datatype); } return rc; }
int MPIX_Neighbor_alltoall_x(const void *sendbuf, MPI_Count sendcount, MPI_Datatype sendtype, void *recvbuf, MPI_Count recvcount, MPI_Datatype recvtype, MPI_Comm comm) { int rc = MPI_SUCCESS; if (likely (sendcount <= bigmpi_int_max && recvcount <= bigmpi_int_max )) { rc = MPI_Neighbor_alltoall(sendbuf, (int)sendcount, sendtype, recvbuf, (int)recvcount, recvtype, comm); } else if (sendcount > bigmpi_int_max && recvcount <= bigmpi_int_max ) { MPI_Datatype newsendtype; MPIX_Type_contiguous_x(sendcount, sendtype, &newsendtype); MPI_Type_commit(&newsendtype); rc = MPI_Neighbor_alltoall(sendbuf, 1, newsendtype, recvbuf, (int)recvcount, recvtype, comm); MPI_Type_free(&newsendtype); } else if (sendcount <= bigmpi_int_max && recvcount > bigmpi_int_max ) { MPI_Datatype newrecvtype; MPIX_Type_contiguous_x(recvcount, recvtype, &newrecvtype); MPI_Type_commit(&newrecvtype); rc = MPI_Neighbor_alltoall(sendbuf, (int)sendcount, sendtype, recvbuf, 1, newrecvtype, comm); MPI_Type_free(&newrecvtype); } else { MPI_Datatype newsendtype, newrecvtype; MPIX_Type_contiguous_x(sendcount, sendtype, &newsendtype); MPIX_Type_contiguous_x(recvcount, recvtype, &newrecvtype); MPI_Type_commit(&newsendtype); MPI_Type_commit(&newrecvtype); rc = MPI_Neighbor_alltoall(sendbuf, 1, newsendtype, recvbuf, 1, newrecvtype, comm); MPI_Type_free(&newsendtype); MPI_Type_free(&newrecvtype); } return rc; }
/* * Synopsis * * a version of MPI_Type_create_hvector, except the array_of_blocklengths can * be larger than 32 bits * * int MPIX_Type_create_hvector_x(MPI_Count count, * MPI_Count array_of_blocklengths[], * MPI_Aint array_of_displacements[], * MPI_Datatype oldtype, * MPI_Datatype * newtype) * * Input Parameters * * count number of blocks -- also number of entries in * array_of_displacements and array_of_blocklengths * (non-negative integer) * * array_of_blocklengths number of elements in each block (array of * non-negative integers) * * array_of_displacements byte displacement of each block (array of * integers) * * oldtype old datatype (handle) * * Output Parameter * * newtype new datatype (handle) * */ int MPIX_Type_create_hvector_x(int count, MPI_Count array_of_blocklengths[], MPI_Aint array_of_displacements[], MPI_Datatype oldtype, MPI_Datatype * newtype) { int i, ret; MPI_Datatype *types; int *blocklens; /* The count has to fit into MPI_Aint for BigMPI to work. */ assert(count<bigmpi_count_max); types = malloc(count*sizeof(*types)); blocklens = malloc(count*sizeof(*blocklens)); for (i=0; i<count; i++) { blocklens[i] = 1; MPIX_Type_contiguous_x(array_of_blocklengths[i], oldtype, &(types[i])); } ret = MPI_Type_create_struct(count, blocklens, array_of_displacements, types, newtype); for (i=0; i<count; i++) MPI_Type_free(&(types[i])); free(types); free(blocklens); return ret; }
int MPIX_Rget_x(void *origin_addr, MPI_Count origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, MPI_Count target_count, MPI_Datatype target_datatype, MPI_Win win, MPI_Request *request) { int rc = MPI_SUCCESS; if (likely (origin_count <= bigmpi_int_max && target_count <= bigmpi_int_max)) { rc = MPI_Rget(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win, request); } else { MPI_Datatype neworigin_datatype, newtarget_datatype; MPIX_Type_contiguous_x(origin_count, origin_datatype, &neworigin_datatype); MPIX_Type_contiguous_x(target_count, target_datatype, &newtarget_datatype); MPI_Type_commit(&neworigin_datatype); MPI_Type_commit(&newtarget_datatype); rc = MPI_Rget(origin_addr, 1, neworigin_datatype, target_rank, target_disp, 1, newtarget_datatype, win, request); MPI_Type_free(&neworigin_datatype); MPI_Type_free(&newtarget_datatype); } return rc; }
/* * Synopsis * * void convert_vectors(..) * * Input Parameter * * int num length of all vectors (unless splat true) * int splat_old_count if non-zero, use oldcount instead of iterating over vector (v-to-w) * MPI_Count oldcount single count (ignored if splat_old_count==0) * MPI_Count oldcounts vector of counts * int splat_old_type if non-zero, use oldtype instead of iterating over vector (v-to-w) * MPI_Datatype oldtype single type (MPI_DATATYPE_NULL if splat_old_type==0) * MPI_Datatype oldtypes vector of types (NULL if splat_old_type!=0) * int zero_new_displs set the displacement to zero (scatter/gather) * MPI_Aint olddispls vector of displacements (NULL if zero_new_displs!=0) * * Output Parameters * * int newcounts * MPI_Datatype newtypes * MPI_Aint newdispls * */ void BigMPI_Convert_vectors(int num, int splat_old_count, const MPI_Count oldcount, const MPI_Count oldcounts[], int splat_old_type, const MPI_Datatype oldtype, const MPI_Datatype oldtypes[], int zero_new_displs, const MPI_Aint olddispls[], int newcounts[], MPI_Datatype newtypes[], MPI_Aint newdispls[]) { assert(splat_old_count || (oldcounts!=NULL)); assert(splat_old_type || (oldtypes!=NULL)); assert(zero_new_displs || (olddispls!=NULL)); MPI_Aint lb /* unused */, oldextent; if (splat_old_type) { MPI_Type_get_extent(oldtype, &lb, &oldextent); } else { /* !splat_old_type implies ALLTOALLW, which implies no displacement zeroing. */ assert(!zero_new_displs); } for (int i=0; i<num; i++) { /* counts */ newcounts[i] = 1; /* types */ MPIX_Type_contiguous_x(oldcounts[i], splat_old_type ? oldtype : oldtypes[i], &newtypes[i]); MPI_Type_commit(&newtypes[i]); /* displacements */ MPI_Aint newextent; /* If we are not splatting old type, it implies ALLTOALLW, * which does not scale the displacement by the type extent, * nor would we ever zero the displacements. */ if (splat_old_type) { MPI_Type_get_extent(newtypes[i], &lb, &newextent); newdispls[i] = (zero_new_displs ? 0 : olddispls[i]*oldextent/newextent); } else { newdispls[i] = olddispls[i]; } } return; }
int main(int argc, char * argv[]) { int provided; MPI_ASSERT(MPI_Init_thread(&argc, &argv, MPI_THREAD_SINGLE, &provided)); int rank, size; MPI_ASSERT(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); MPI_ASSERT(MPI_Comm_size(MPI_COMM_WORLD, &size)); int logn = (argc>1) ? atoi(argv[1]) : 32; size_t count = (size_t)1<<logn; /* explicit cast required */ printf("count = %zu \n", count ); MPI_Datatype bigtype; MPI_ASSERT(MPIX_Type_contiguous_x( (MPI_Count)count, MPI_CHAR, &bigtype)); MPI_ASSERT(MPI_Type_commit(&bigtype)); char * rbuf = NULL; char * sbuf = NULL; #ifdef USE_MPI_ALLOC_MEM MPI_ASSERT(MPI_Alloc_mem( (MPI_Aint)count * sizeof(char), MPI_INFO_NULL, &rbuf)); MPI_ASSERT(MPI_Alloc_mem( (MPI_Aint)count * sizeof(char), MPI_INFO_NULL, &sbuf)); #else rbuf = malloc( count * sizeof(char)); assert(rbuf!=NULL); sbuf = malloc( count * sizeof(char)); assert(sbuf!=NULL); #endif for (size_t i=0; i<count; i++) rbuf[i] = 'a'; for (size_t i=0; i<count; i++) sbuf[i] = 'z'; MPI_Request requests[2]; MPI_Status statuses[2]; if (rank==(size-1)) { MPI_ASSERT(MPI_Irecv(rbuf, 1, bigtype, 0, 0, MPI_COMM_WORLD, &(requests[1]) )); } if (rank==0) { MPI_ASSERT(MPI_Isend(sbuf, 1, bigtype, size-1, 0, MPI_COMM_WORLD, &(requests[0]) )); } MPI_Count ocount[2]; if (size==1) { MPI_ASSERT(MPI_Waitall(2, requests, statuses)); MPI_ASSERT(MPI_Get_elements_x( &(statuses[1]), MPI_CHAR, &(ocount[1]))); } else { if (rank==(size-1)) { MPI_ASSERT(MPI_Wait( &(requests[1]), &(statuses[1]) )); MPI_ASSERT(MPI_Get_elements_x( &(statuses[1]), MPI_CHAR, &(ocount[1]) )); } else if (rank==0) { MPI_ASSERT(MPI_Wait( &(requests[0]), &(statuses[0]) )); MPI_ASSERT(MPI_Get_elements_x( &(statuses[0]), MPI_CHAR, &(ocount[0]) )); } } if (rank==0) { printf("ocount[0] = %lld \n", ocount[0]); } else if ( rank==(size-1) ) { printf("ocount[1] = %lld \n", ocount[1]); } /* correctness check */ if (rank==(size-1)) { MPI_Count errors = 0; for (MPI_Count i=0; i<count; i++) errors += ( rbuf[i] != 'z' ); printf("errors = %lld \n", errors); } #ifdef USE_MPI_ALLOC_MEM MPI_ASSERT(MPI_Free_mem(rbuf)); MPI_ASSERT(MPI_Free_mem(sbuf)); #else free(rbuf); free(sbuf); #endif MPI_ASSERT(MPI_Type_free(&bigtype)); MPI_ASSERT(MPI_Finalize()); return 0; }