/*@C PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank Not Collective Input Arguments: + sf - star forest - unit - data type for each node Output Arguments: + localtypes - types describing part of local leaf buffer referencing each remote rank - remotetypes - types describing part of remote root buffer referenced for each remote rank Level: developer .seealso: PetscSFSetGraph(), PetscSFView() @*/ static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes) { PetscSF_Window *w = (PetscSF_Window*)sf->data; PetscErrorCode ierr; PetscSFDataLink link; PetscInt i,nranks; const PetscInt *roffset,*rmine,*rremote; const PetscMPIInt *ranks; PetscFunctionBegin; /* Look for types in cache */ for (link=w->link; link; link=link->next) { PetscBool match; ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); if (match) { *localtypes = link->mine; *remotetypes = link->remote; PetscFunctionReturn(0); } } /* Create new composite types for each send rank */ ierr = PetscSFGetRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); ierr = PetscMalloc(sizeof(*link),&link);CHKERRQ(ierr); ierr = MPI_Type_dup(unit,&link->unit);CHKERRQ(ierr); ierr = PetscMalloc2(nranks,&link->mine,nranks,&link->remote);CHKERRQ(ierr); for (i=0; i<nranks; i++) { PETSC_UNUSED PetscInt rcount = roffset[i+1] - roffset[i]; PetscMPIInt *rmine,*rremote; #if !defined(PETSC_USE_64BIT_INDICES) rmine = sf->rmine + sf->roffset[i]; rremote = sf->rremote + sf->roffset[i]; #else PetscInt j; ierr = PetscMalloc2(rcount,&rmine,rcount,&rremote);CHKERRQ(ierr); for (j=0; j<rcount; j++) { ierr = PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);CHKERRQ(ierr); ierr = PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);CHKERRQ(ierr); } #endif ierr = MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);CHKERRQ(ierr); ierr = MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);CHKERRQ(ierr); #if defined(PETSC_USE_64BIT_INDICES) ierr = PetscFree2(rmine,rremote);CHKERRQ(ierr); #endif ierr = MPI_Type_commit(&link->mine[i]);CHKERRQ(ierr); ierr = MPI_Type_commit(&link->remote[i]);CHKERRQ(ierr); } link->next = w->link; w->link = link; *localtypes = link->mine; *remotetypes = link->remote; PetscFunctionReturn(0); }
/* * Setup indexed-block type info and handlers. * * A indexed-block datatype is created by using following parameters. * nblock: Number of blocks. * blocklen: Number of elements in each block. * stride: Strided number of elements between two adjacent blocks. The * displacement of each block is set as (index of current block * stride). * lb: Lower bound of the new datatype. * oldtype: Datatype of element. */ static int MTestTypeIndexedBlockCreate(MPI_Aint nblock, MPI_Aint blocklen, MPI_Aint stride, MPI_Aint lb, MPI_Datatype oldtype, const char *typename_prefix, MTestDatatype * mtype) { int merr; char type_name[128]; int i; MTestTypeReset(mtype); merr = MPI_Type_size(oldtype, &mtype->basesize); if (merr) MTestPrintError(merr); mtype->displs = (int *) malloc(nblock * sizeof(int)); mtype->displ_in_bytes = (MPI_Aint *) malloc(nblock * sizeof(MPI_Aint)); if (!mtype->displs || !mtype->displ_in_bytes) { char errmsg[128] = { 0 }; sprintf(errmsg, "Out of memory in %s", __FUNCTION__); MTestError(errmsg); } mtype->nblock = nblock; mtype->blksize = blocklen * mtype->basesize; for (i = 0; i < nblock; i++) { mtype->displs[i] = lb + stride * i; mtype->displ_in_bytes[i] = (lb + stride * i) * mtype->basesize; } /* Indexed-block uses displacement in oldtypes */ merr = MPI_Type_create_indexed_block(nblock, blocklen, mtype->displs, oldtype, &mtype->datatype); if (merr) MTestPrintError(merr); merr = MPI_Type_commit(&mtype->datatype); if (merr) MTestPrintError(merr); memset(type_name, 0, sizeof(type_name)); sprintf(type_name, "%s %s (%ld nblock %ld blocklen %ld stride %ld lb)", typename_prefix, "index_block", nblock, blocklen, stride, lb); merr = MPI_Type_set_name(mtype->datatype, (char *) type_name); if (merr) MTestPrintError(merr); mtype->InitBuf = MTestTypeIndexedBlockInit; mtype->FreeBuf = MTestTypeFree; mtype->CheckBuf = MTestTypeIndexedBlockCheckbuf; return merr; }
void mpi_type_create_indexed_block_f(MPI_Fint *count, MPI_Fint *blocklength, MPI_Fint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr) { MPI_Datatype c_old = MPI_Type_f2c(*oldtype); MPI_Datatype c_new; OMPI_ARRAY_NAME_DECL(array_of_displacements); OMPI_ARRAY_FINT_2_INT(array_of_displacements, *count); *ierr = OMPI_INT_2_FINT( MPI_Type_create_indexed_block(OMPI_FINT_2_INT(*count), OMPI_FINT_2_INT(*blocklength), OMPI_ARRAY_NAME_CONVERT(array_of_displacements), c_old, &c_new)); if (MPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) { *newtype = MPI_Type_c2f(c_new); } OMPI_ARRAY_FINT_2_INT_CLEANUP(array_of_displacements); }
int main( int argc, char *argv[] ) { int errs = 0; int rank, size, dest, source; int i, indices[40]; MPI_Aint extent; int *buf, *bufs[MAX_MSGS]; MPI_Comm comm; MPI_Datatype dtype; MPI_Request req[MAX_MSGS]; MTest_Init( &argc, &argv ); comm = MPI_COMM_WORLD; MPI_Comm_rank( comm, &rank ); MPI_Comm_size( comm, &size ); source = 0; dest = size - 1; /* Setup by creating a blocked datatype that is likely to be processed in a piecemeal fashion */ for (i=0; i<30; i++) { indices[i] = i*40; } /* 30 blocks of size 10 */ MPI_Type_create_indexed_block( 30, 10, indices, MPI_INT, &dtype ); MPI_Type_commit( &dtype ); /* Create the corresponding message buffers */ MPI_Type_extent( dtype, &extent ); for (i=0; i<MAX_MSGS; i++) { bufs[i] = (int *)malloc( extent ); if (!bufs[i]) { fprintf( stderr, "Unable to allocate buffer %d of size %ld\n", i, (long)extent ); MPI_Abort( MPI_COMM_WORLD, 1 ); } } buf = (int *)malloc( 10 * 30 * sizeof(int) ); MPI_Barrier( MPI_COMM_WORLD ); if (rank == dest) { MTestSleep( 2 ); for (i=0; i<MAX_MSGS; i++) { MPI_Recv( buf, 10*30, MPI_INT, source, i, comm, MPI_STATUS_IGNORE ); } } else if (rank == source ) { for (i=0; i<MAX_MSGS; i++) { MPI_Isend( bufs[i], 1, dtype, dest, i, comm, &req[i] ); } MPI_Waitall( MAX_MSGS, req, MPI_STATUSES_IGNORE ); } MPI_Type_free( &dtype ); MTest_Finalize( errs ); MPI_Finalize(); return 0; }
/* blockindexed_contig_test() * * Tests behavior with a blockindexed that can be converted to a * contig easily. This is specifically for coverage. * * Returns the number of errors encountered. */ int blockindexed_contig_test(void) { int buf[4] = { 7, -1, -2, -3 }; int err, errs = 0; int i, count = 1; int disp = 0; MPI_Datatype newtype; int size, int_size; MPI_Aint extent; err = MPI_Type_create_indexed_block(count, 1, &disp, MPI_INT, &newtype); if (err != MPI_SUCCESS) { if (verbose) { fprintf(stderr, "error creating struct type in blockindexed_contig_test()\n"); } errs++; } MPI_Type_size(MPI_INT, &int_size); err = MPI_Type_size(newtype, &size); if (err != MPI_SUCCESS) { if (verbose) { fprintf(stderr, "error obtaining type size in blockindexed_contig_test()\n"); } errs++; } if (size != int_size) { if (verbose) { fprintf(stderr, "error: size != int_size in blockindexed_contig_test()\n"); } errs++; } err = MPI_Type_extent(newtype, &extent); if (err != MPI_SUCCESS) { if (verbose) { fprintf(stderr, "error obtaining type extent in blockindexed_contig_test()\n"); } errs++; } if (extent != int_size) { if (verbose) { fprintf(stderr, "error: extent != int_size in blockindexed_contig_test()\n"); } errs++; } MPI_Type_commit(&newtype); err = pack_and_unpack((char *) buf, 1, newtype, 4 * sizeof(int)); if (err != 0) { if (verbose) { fprintf(stderr, "error packing/unpacking in blockindexed_contig_test()\n"); } errs += err; } for (i = 0; i < 4; i++) { int goodval; switch (i) { case 0: goodval = 7; break; default: goodval = 0; /* pack_and_unpack() zeros before unpack */ break; } if (buf[i] != goodval) { errs++; if (verbose) fprintf(stderr, "buf[%d] = %d; should be %d\n", i, buf[i], goodval); } } MPI_Type_free(&newtype); return errs; }
/* blockindexed_vector_test() * * Tests behavior with a blockindexed of some vector types; * this shouldn't be easily convertable into anything else. * * Returns the number of errors encountered. */ int blockindexed_vector_test(void) { #define NELT (18) int buf[NELT] = { -1, -1, -1, 1, -2, 2, -3, -3, -3, -4, -4, -4, 3, -5, 4, 5, -6, 6 }; int expected[NELT] = { 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 3, 0, 4, 5, 0, 6 }; int err, errs = 0; int i, count = 3; int disp[] = { 1, 4, 5 }; MPI_Datatype vectype, newtype; int size, int_size; /* create a vector type of 2 ints, skipping one in between */ err = MPI_Type_vector(2, 1, 2, MPI_INT, &vectype); if (err != MPI_SUCCESS) { if (verbose) { fprintf(stderr, "error creating vector type in blockindexed_contig_test()\n"); } errs++; } err = MPI_Type_create_indexed_block(count, 1, disp, vectype, &newtype); if (err != MPI_SUCCESS) { if (verbose) { fprintf(stderr, "error creating blockindexed type in blockindexed_contig_test()\n"); } errs++; } MPI_Type_size(MPI_INT, &int_size); err = MPI_Type_size(newtype, &size); if (err != MPI_SUCCESS) { if (verbose) { fprintf(stderr, "error obtaining type size in blockindexed_contig_test()\n"); } errs++; } if (size != 6 * int_size) { if (verbose) { fprintf(stderr, "error: size != 6 * int_size in blockindexed_contig_test()\n"); } errs++; } MPI_Type_commit(&newtype); err = pack_and_unpack((char *) buf, 1, newtype, NELT * sizeof(int)); if (err != 0) { if (verbose) { fprintf(stderr, "error packing/unpacking in blockindexed_vector_test()\n"); } errs += err; } for (i = 0; i < NELT; i++) { if (buf[i] != expected[i]) { errs++; if (verbose) fprintf(stderr, "buf[%d] = %d; should be %d\n", i, buf[i], expected[i]); } } MPI_Type_free(&vectype); MPI_Type_free(&newtype); return errs; }
int main(int argc, char **argv) { int i, j, rank, nranks, peer, bufsize, errs; double *win_buf, *src_buf, *dst_buf; MPI_Win buf_win; MTest_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); bufsize = XDIM * YDIM * sizeof(double); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &win_buf); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &dst_buf); for (i = 0; i < XDIM * YDIM; i++) { *(win_buf + i) = 1.0 + rank; *(src_buf + i) = 1.0 + rank; } MPI_Win_create(win_buf, bufsize, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &buf_win); peer = (rank + 1) % nranks; /* Perform ITERATIONS strided put operations */ for (i = 0; i < ITERATIONS; i++) { MPI_Aint idx_loc[SUB_YDIM]; int idx_rem[SUB_YDIM]; int blk_len[SUB_YDIM]; MPI_Datatype src_type, dst_type; for (j = 0; j < SUB_YDIM; j++) { MPI_Get_address(&src_buf[j * XDIM], &idx_loc[j]); idx_rem[j] = j * XDIM * sizeof(double); blk_len[j] = SUB_XDIM * sizeof(double); } MPI_Type_create_hindexed(SUB_YDIM, blk_len, idx_loc, MPI_BYTE, &src_type); MPI_Type_create_indexed_block(SUB_YDIM, SUB_XDIM * sizeof(double), idx_rem, MPI_BYTE, &dst_type); MPI_Type_commit(&src_type); MPI_Type_commit(&dst_type); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win); MPI_Put(MPI_BOTTOM, 1, src_type, peer, 0, 1, dst_type, buf_win); MPI_Win_unlock(peer, buf_win); MPI_Type_free(&src_type); MPI_Type_free(&dst_type); } MPI_Barrier(MPI_COMM_WORLD); /* Verify that the results are correct */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win); errs = 0; for (i = 0; i < SUB_XDIM; i++) { for (j = 0; j < SUB_YDIM; j++) { const double actual = *(win_buf + i + j * XDIM); const double expected = (1.0 + ((rank + nranks - 1) % nranks)); if (actual - expected > 1e-10) { SQUELCH(printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n", rank, j, i, expected, actual);); errs++; fflush(stdout); } }
int main(int argc, char *argv[]) { int rank, nprocs, i, *counter_mem, *get_array, *get_idx, *acc_idx, mask, nlevels, level, idx, tmp_rank, pof2; MPI_Datatype get_type, acc_type; MPI_Win win; int errs = 0, *results, *counter_vals; MTest_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&rank); if (rank == 0) { /* allocate counter memory and initialize to 0 */ /* find the next power-of-two >= nprocs */ pof2 = 1; while (pof2 < nprocs) pof2 *= 2; /* counter_mem = (int *) calloc(pof2*2, sizeof(int)); */ i = MPI_Alloc_mem(pof2*2*sizeof(int), MPI_INFO_NULL, &counter_mem); if (i) { printf("Can't allocate memory in test program\n"); MPI_Abort(MPI_COMM_WORLD, 1); } for (i=0; i<(pof2*2); i++) counter_mem[i] = 0; MPI_Win_create(counter_mem, pof2*2*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); MPI_Win_free(&win); /* free(counter_mem) */ MPI_Free_mem(counter_mem); /* gather the results from other processes, sort them, and check whether they represent a counter being incremented by 1 */ results = (int *) malloc(NTIMES*nprocs*sizeof(int)); for (i=0; i<NTIMES*nprocs; i++) results[i] = -1; MPI_Gather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, results, NTIMES, MPI_INT, 0, MPI_COMM_WORLD); qsort(results+NTIMES, NTIMES*(nprocs-1), sizeof(int), compar); for (i=NTIMES+1; i<(NTIMES*nprocs); i++) if (results[i] != results[i-1] + 1) errs++; free(results); } else { /* Get the largest power of two smaller than nprocs */ mask = 1; nlevels = 0; while (mask < nprocs) { mask <<= 1; nlevels++; } mask >>= 1; get_array = (int *) malloc(nlevels * sizeof(int)); get_idx = (int *) malloc(nlevels * sizeof(int)); acc_idx = (int *) malloc(nlevels * sizeof(int)); level = 0; idx = 0; tmp_rank = rank; while (mask >= 1) { if (tmp_rank < mask) { /* go to left for acc_idx, go to right for get_idx. set idx=acc_idx for next iteration */ acc_idx[level] = idx + 1; get_idx[level] = idx + mask*2; idx = idx + 1; } else { /* go to right for acc_idx, go to left for get_idx. set idx=acc_idx for next iteration */ acc_idx[level] = idx + mask*2; get_idx[level] = idx + 1; idx = idx + mask*2; } level++; tmp_rank = tmp_rank % mask; mask >>= 1; } /* for (i=0; i<nlevels; i++) printf("Rank %d, acc_idx[%d]=%d, get_idx[%d]=%d\n", rank, i, acc_idx[i], i, get_idx[i]); */ MPI_Type_create_indexed_block(nlevels, 1, get_idx, MPI_INT, &get_type); MPI_Type_create_indexed_block(nlevels, 1, acc_idx, MPI_INT, &acc_type); MPI_Type_commit(&get_type); MPI_Type_commit(&acc_type); /* allocate array to store the values obtained from the fetch-and-add counter */ counter_vals = (int *) malloc(NTIMES * sizeof(int)); MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); for (i=0; i<NTIMES; i++) { Get_nextval_tree(win, get_array, get_type, acc_type, nlevels, counter_vals+i); /* printf("Rank %d, counter %d\n", rank, value); */ } MPI_Win_free(&win); free(get_array); free(get_idx); free(acc_idx); MPI_Type_free(&get_type); MPI_Type_free(&acc_type); /* gather the results to the root */ MPI_Gather(counter_vals, NTIMES, MPI_INT, NULL, 0, MPI_DATATYPE_NULL, 0, MPI_COMM_WORLD); free(counter_vals); } MTest_Finalize(errs); MPI_Finalize(); return MTestReturnValue( errs ); }
/* blockindexed_test() * * Tests behavior with a zero-count blockindexed. * * Returns the number of errors encountered. */ int blockindexed_test(void) { int err, errs = 0; int count = 0; MPI_Datatype newtype; int size; MPI_Aint extent; err = MPI_Type_create_indexed_block(count, 0, (int *) 0, MPI_INT, &newtype); if (err != MPI_SUCCESS) { if (verbose) { fprintf(stderr, "error creating struct type in blockindexed_test()\n"); } errs++; } err = MPI_Type_size(newtype, &size); if (err != MPI_SUCCESS) { if (verbose) { fprintf(stderr, "error obtaining type size in blockindexed_test()\n"); } errs++; } if (size != 0) { if (verbose) { fprintf(stderr, "error: size != 0 in blockindexed_test()\n"); } errs++; } err = MPI_Type_extent(newtype, &extent); if (err != MPI_SUCCESS) { if (verbose) { fprintf(stderr, "error obtaining type extent in blockindexed_test()\n"); } errs++; } if (extent != 0) { if (verbose) { fprintf(stderr, "error: extent != 0 in blockindexed_test()\n"); } errs++; } MPI_Type_free( &newtype ); return errs; }
int TestIndexPackDouble( int n, int stride, double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src ) { double *restrict d_dest; const double *restrict d_src; register int i, j; int rep, position; int *restrict displs = 0; double t1, t2, t[NTRIALS]; MPI_Datatype indextype; displs = (int *)malloc( n * sizeof(int) ); for (i=0; i<n; i++) displs[i] = i * stride; /* User code */ if (verbose) printf("TestIndexPackDouble (USER): "); for (j = 0; j < NTRIALS; j++) { t1 = MPI_Wtime(); for (rep=0; rep<N_REPS; rep++) { i = n; d_dest = dest; d_src = src; for (i=0; i<n; i++) { *d_dest++ = d_src[displs[i]]; } } t2 = MPI_Wtime() - t1; t[j] = t2; if (verbose) printf("%.3f ", t[j]); } if (verbose) printf("[%.3f]\n", noise(t, NTRIALS)); /* If there is too much noise, discard the test */ if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) { *avgTimeUser = 0; *avgTimeMPI = 0; if (verbose) printf("Too much noise; discarding measurement\n"); return 0; } *avgTimeUser = mean(t, NTRIALS) / N_REPS; /* MPI Index code */ MPI_Type_create_indexed_block( n, 1, displs, MPI_DOUBLE, &indextype ); MPI_Type_commit( &indextype ); free( displs ); if (verbose) printf("TestIndexPackDouble (MPI): "); for (j = 0; j < NTRIALS; j++) { t1 = MPI_Wtime(); for (rep=0; rep<N_REPS; rep++) { position = 0; MPI_Pack( (void *)src, 1, indextype, dest, n*sizeof(double), &position, MPI_COMM_SELF ); } t2 = MPI_Wtime() - t1; t[j] = t2; if (verbose) printf("%.3f ", t[j]); } if (verbose) printf("[%.3f]\n", noise(t, NTRIALS)); /* If there is too much noise, discard the test */ if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) { *avgTimeUser = 0; *avgTimeMPI = 0; if (verbose) printf("Too much noise; discarding measurement\n"); } else { *avgTimeMPI = mean(t, NTRIALS) / N_REPS; } MPI_Type_free( &indextype ); return 0; }
int main(int argc, char **argv) { int nprocs, mpi_err, *array; int getval, disp, errs=0; MPI_Win win; MPI_Datatype type; MTest_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); if (nprocs != 1) { printf("Run this program with 1 process\n"); MPI_Abort(MPI_COMM_WORLD,1); } /* To improve reporting of problems about operations, we change the error handler to errors return */ MPI_Comm_set_errhandler( MPI_COMM_WORLD, MPI_ERRORS_RETURN ); /* create an indexed datatype that points to the second integer in an array (the first integer is skipped). */ disp = 1; mpi_err = MPI_Type_create_indexed_block(1, 1, &disp, MPI_INT, &type); if (mpi_err != MPI_SUCCESS) goto err_return; mpi_err = MPI_Type_commit(&type); if (mpi_err != MPI_SUCCESS) goto err_return; /* allocate window of size 2 integers*/ mpi_err = MPI_Alloc_mem(2*sizeof(int), MPI_INFO_NULL, &array); if (mpi_err != MPI_SUCCESS) goto err_return; /* create window object */ mpi_err = MPI_Win_create(array, 2*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); if (mpi_err != MPI_SUCCESS) goto err_return; /* initialize array */ array[0] = 100; array[1] = 200; getval = 0; /* To improve reporting of problems about operations, we change the error handler to errors return */ MPI_Win_set_errhandler( win, MPI_ERRORS_RETURN ); mpi_err = MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, 0, win); if (mpi_err != MPI_SUCCESS) goto err_return; /* get the current value of element array[1] */ mpi_err = MPI_Get(&getval, 1, MPI_INT, 0, 0, 1, type, win); if (mpi_err != MPI_SUCCESS) goto err_return; mpi_err = MPI_Win_unlock(0, win); if (mpi_err != MPI_SUCCESS) goto err_return; /* getval should contain the value of array[1] */ if (getval != array[1]) { errs++; printf("getval=%d, should be %d\n", getval, array[1]); } MPI_Free_mem(array); MPI_Win_free(&win); MPI_Type_free(&type); MTest_Finalize(errs); MPI_Finalize(); return 0; err_return: printf("MPI function error returned an error\n"); MTestPrintError( mpi_err ); errs++; MTest_Finalize(errs); MPI_Finalize(); return 1; }
int main(void) { MPI_Init(NULL, NULL); int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size == 1) { MPI_Datatype sends[2], recvs[2]; { int count = 2, blocklen = 2, stride = 4; MPI_Type_vector(count, blocklen, stride, MPI_INT, &recvs[0]); MPI_Type_commit(&recvs[0]); } { int count = 1; int blocklength = 4; int array_of_displacements[] = {4}; MPI_Type_create_indexed_block(count, blocklength, array_of_displacements, MPI_INT, &sends[0]); MPI_Type_commit(&sends[0]); } { int count = 1; int blocklength = 4; int array_of_displacements[] = {4}; MPI_Type_create_indexed_block(count, blocklength, array_of_displacements, MPI_INT, &recvs[1]); MPI_Type_commit(&recvs[1]); } { int count = 2, blocklen = 2, stride = 4; MPI_Type_vector(count, blocklen, stride, MPI_INT, &sends[1]); MPI_Type_commit(&sends[1]); } { int raw_input[24] = {0,1,2,3,4,5,6,7, -2,-2,-2,-2,-2,-2,-2,-2, 8,9,10,11,12,13,14,15}; int * input_1 = &raw_input[0], * input_2 = &raw_input[16]; int * inputs[2] = {input_1, input_2}; puts("first tests:"); do_test(recvs, sends, inputs); } { int raw_input[16] = {0,1,2,3,4,5,6,7, 8,9,10,11,12,13,14,15}; int * input_1 = &raw_input[0], * input_2 = &raw_input[8]; int * inputs[2] = {input_1, input_2}; puts("second tests:"); do_test(recvs, sends, inputs); } MPI_Type_free(&sends[1]); MPI_Type_free(&recvs[1]); MPI_Type_free(&sends[0]); MPI_Type_free(&recvs[0]); } MPI_Finalize(); return 0; }
int MPICH_AlltoAll_short( void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm ) { int comm_size, i, pof2; MPI_Aint sendtype_extent, recvtype_extent; int mpi_errno=MPI_SUCCESS, src, dst, rank, nbytes; MPI_Status status; void *tmp_buf; int sendtype_size, pack_size, block, position, *displs, count; MPI_Datatype newtype; MPI_Aint recvtype_true_extent, recvbuf_extent, recvtype_true_lb; if (sendcount == 0) return MPI_SUCCESS; MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Comm_size (MPI_COMM_WORLD, &comm_size); /* Get extent of send and recv types */ MPID_Datatype_get_extent_macro(recvtype, recvtype_extent); MPID_Datatype_get_extent_macro(sendtype, sendtype_extent); MPID_Datatype_get_size_macro(sendtype, sendtype_size); nbytes = sendtype_size * sendcount; /* use the indexing algorithm by Jehoshua Bruck et al, * IEEE TPDS, Nov. 97 */ /* allocate temporary buffer */ MPI_Pack_size(recvcount*comm_size, recvtype, comm, &pack_size); tmp_buf = malloc(pack_size); CkAssert(tmp_buf); /* Do Phase 1 of the algorithim. Shift the data blocks on process i * upwards by a distance of i blocks. Store the result in recvbuf. */ MPICH_Localcopy((char *) sendbuf + rank*sendcount*sendtype_extent, (comm_size - rank)*sendcount, sendtype, recvbuf, (comm_size - rank)*recvcount, recvtype); MPICH_Localcopy(sendbuf, rank*sendcount, sendtype, (char *) recvbuf + (comm_size-rank)*recvcount*recvtype_extent, rank*recvcount, recvtype); /* Input data is now stored in recvbuf with datatype recvtype */ /* Now do Phase 2, the communication phase. It takes ceiling(lg p) steps. In each step i, each process sends to rank+2^i and receives from rank-2^i, and exchanges all data blocks whose ith bit is 1. */ /* allocate displacements array for indexed datatype used in communication */ displs = (int*)malloc(comm_size * sizeof(int)); CkAssert(displs); pof2 = 1; while (pof2 < comm_size) { dst = (rank + pof2) % comm_size; src = (rank - pof2 + comm_size) % comm_size; /* Exchange all data blocks whose ith bit is 1 */ /* Create an indexed datatype for the purpose */ count = 0; for (block=1; block<comm_size; block++) { if (block & pof2) { displs[count] = block * recvcount; count++; } } mpi_errno = MPI_Type_create_indexed_block(count, recvcount, displs, recvtype, &newtype); if (mpi_errno) return mpi_errno; mpi_errno = MPI_Type_commit(&newtype); if (mpi_errno) return mpi_errno; position = 0; mpi_errno = MPI_Pack(recvbuf, 1, newtype, tmp_buf, pack_size, &position, comm); mpi_errno = AMPI_Sendrecv(tmp_buf, position, MPI_PACKED, dst, MPI_ATA_TAG, recvbuf, 1, newtype, src, MPI_ATA_TAG, comm, MPI_STATUS_IGNORE); if (mpi_errno) return mpi_errno; mpi_errno = MPI_Type_free(&newtype); if (mpi_errno) return mpi_errno; pof2 *= 2; } free(displs); free(tmp_buf); /* Rotate blocks in recvbuf upwards by (rank + 1) blocks. Need * a temporary buffer of the same size as recvbuf. */ /* get true extent of recvtype */ mpi_errno = MPI_Type_get_true_extent(recvtype, &recvtype_true_lb, &recvtype_true_extent); if (mpi_errno) return mpi_errno; recvbuf_extent = recvcount * comm_size * (MAX(recvtype_true_extent, recvtype_extent)); tmp_buf = malloc(recvbuf_extent); CkAssert(tmp_buf); /* adjust for potential negative lower bound in datatype */ tmp_buf = (void *)((char*)tmp_buf - recvtype_true_lb); MPICH_Localcopy((char *) recvbuf + (rank+1)*recvcount*recvtype_extent, (comm_size - rank - 1)*recvcount, recvtype, tmp_buf, (comm_size - rank - 1)*recvcount, recvtype); MPICH_Localcopy(recvbuf, (rank+1)*recvcount, recvtype, (char *) tmp_buf + (comm_size-rank-1)*recvcount*recvtype_extent, (rank+1)*recvcount, recvtype); /* Blocks are in the reverse order now (comm_size-1 to 0). * Reorder them to (0 to comm_size-1) and store them in recvbuf. */ for (i=0; i<comm_size; i++) MPICH_Localcopy((char *) tmp_buf + i*recvcount*recvtype_extent, recvcount, recvtype, (char *) recvbuf + (comm_size-i-1)*recvcount*recvtype_extent, recvcount, recvtype); free((char*)tmp_buf + recvtype_true_lb); }
/* regression for tt#1030, checks for bad offset math in the * blockindexed and indexed dataloop flattening code */ int flatten_test(void) { int err, errs = 0; #define ARR_SIZE (9) /* real indices 0 1 2 3 4 5 6 7 8 * indices w/ &array[3] -3 -2 -1 0 1 2 3 4 5 */ int array[ARR_SIZE] = {-1,-1,-1,-1,-1,-1,-1,-1,-1}; int expected[ARR_SIZE] = {-1, 0, 1,-1, 2,-1, 3,-1, 4}; MPI_Datatype idx_type = MPI_DATATYPE_NULL; MPI_Datatype blkidx_type = MPI_DATATYPE_NULL; MPI_Datatype combo = MPI_DATATYPE_NULL; #define COUNT (2) int displ[COUNT]; MPI_Aint adispl[COUNT]; int blens[COUNT]; MPI_Datatype types[COUNT]; /* indexed type layout: * XX_X * 2101 <-- pos (left of 0 is neg) * * different blens to prevent optimization into a blockindexed */ blens[0] = 2; displ[0] = -2; /* elements, puts byte after block end at 0 */ blens[1] = 1; displ[1] = 1; /*elements*/ err = MPI_Type_indexed(COUNT, blens, displ, MPI_INT, &idx_type); check_err(MPI_Type_indexed); err = MPI_Type_commit(&idx_type); check_err(MPI_Type_commit); /* indexed type layout: * _X_X * 2101 <-- pos (left of 0 is neg) */ displ[0] = -1; displ[1] = 1; err = MPI_Type_create_indexed_block(COUNT, 1, displ, MPI_INT, &blkidx_type); check_err(MPI_Type_indexed_block); err = MPI_Type_commit(&blkidx_type); check_err(MPI_Type_commit); /* struct type layout: * II_I_B_B (I=idx_type, B=blkidx_type) * 21012345 <-- pos (left of 0 is neg) */ blens[0] = 1; adispl[0] = 0; /*bytes*/ types[0] = idx_type; blens[1] = 1; adispl[1] = 4 * sizeof(int); /* bytes */ types[1] = blkidx_type; /* must be a struct in order to trigger flattening code */ err = MPI_Type_create_struct(COUNT, blens, adispl, types, &combo); check_err(MPI_Type_indexed); err = MPI_Type_commit(&combo); check_err(MPI_Type_commit); /* pack/unpack with &array[3] */ errs += pack_and_check_expected(combo, "combo", 3, ARR_SIZE, array, expected); MPI_Type_free(&combo); MPI_Type_free(&idx_type); MPI_Type_free(&blkidx_type); return errs; #undef COUNT }
FORT_DLL_SPEC void FORT_CALL mpi_type_create_indexed_block_ ( MPI_Fint *v1, MPI_Fint *v2, MPI_Fint v3[], MPI_Fint *v4, MPI_Fint *v5, MPI_Fint *ierr ){ *ierr = MPI_Type_create_indexed_block( (int)*v1, (int)*v2, v3, (MPI_Datatype)(*v4), (MPI_Datatype *)(v5) ); }
/** Optimized implementation of the ARMCI IOV operation that uses an MPI * datatype to achieve a one-sided gather/scatter. Does not use MPI_BOTTOM. */ int ARMCII_Iov_op_datatype_no_bottom(enum ARMCII_Op_e op, void **src, void **dst, int count, int elem_count, MPI_Datatype type, int proc) { gmr_t *mreg; MPI_Datatype type_loc, type_rem; MPI_Aint disp_loc[count]; int disp_rem[count]; int block_len[count]; void *dst_win_base; int dst_win_size, i, type_size; void **buf_rem, **buf_loc; MPI_Aint base_rem; MPI_Aint base_loc; void *base_loc_ptr; switch(op) { case ARMCII_OP_ACC: case ARMCII_OP_PUT: buf_rem = dst; buf_loc = src; break; case ARMCII_OP_GET: buf_rem = src; buf_loc = dst; break; default: ARMCII_Error("unknown operation (%d)", op); return 1; } MPI_Type_size(type, &type_size); mreg = gmr_lookup(buf_rem[0], proc); ARMCII_Assert_msg(mreg != NULL, "Invalid remote pointer"); dst_win_base = mreg->slices[proc].base; dst_win_size = mreg->slices[proc].size; MPI_Get_address(dst_win_base, &base_rem); /* Pick a base address for the start of the origin's datatype */ base_loc_ptr = buf_loc[0]; MPI_Get_address(base_loc_ptr, &base_loc); for (i = 0; i < count; i++) { MPI_Aint target_rem, target_loc; MPI_Get_address(buf_loc[i], &target_loc); MPI_Get_address(buf_rem[i], &target_rem); disp_loc[i] = target_loc - base_loc; disp_rem[i] = (target_rem - base_rem)/type_size; block_len[i] = elem_count; ARMCII_Assert_msg((target_rem - base_rem) % type_size == 0, "Transfer size is not a multiple of type size"); ARMCII_Assert_msg(disp_rem[i] >= 0 && disp_rem[i] < dst_win_size, "Invalid remote pointer"); ARMCII_Assert_msg(((uint8_t*)buf_rem[i]) + block_len[i] <= ((uint8_t*)dst_win_base) + dst_win_size, "Transfer exceeds buffer length"); } MPI_Type_create_hindexed(count, block_len, disp_loc, type, &type_loc); MPI_Type_create_indexed_block(count, elem_count, disp_rem, type, &type_rem); //MPI_Type_indexed(count, block_len, disp_rem, type, &type_rem); MPI_Type_commit(&type_loc); MPI_Type_commit(&type_rem); gmr_lock(mreg, proc); switch(op) { case ARMCII_OP_ACC: gmr_accumulate_typed(mreg, base_loc_ptr, 1, type_loc, MPI_BOTTOM, 1, type_rem, proc); break; case ARMCII_OP_PUT: gmr_put_typed(mreg, base_loc_ptr, 1, type_loc, MPI_BOTTOM, 1, type_rem, proc); break; case ARMCII_OP_GET: gmr_get_typed(mreg, MPI_BOTTOM, 1, type_rem, base_loc_ptr, 1, type_loc, proc); break; default: ARMCII_Error("unknown operation (%d)", op); return 1; } gmr_unlock(mreg, proc); MPI_Type_free(&type_loc); MPI_Type_free(&type_rem); return 0; }