int MPI_Type_hvector(int count, int blocklength, MPI_Aint stride, MPI_Datatype oldtype, MPI_Datatype *newtype) { if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == oldtype || MPI_DATATYPE_NULL == oldtype || NULL == newtype) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_TYPE, FUNC_NAME ); } else if (count < 0) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COUNT, FUNC_NAME ); } else if (blocklength < 0) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME ); } } return MPI_Type_create_hvector(count, blocklength, stride, oldtype, newtype); }
static void _mpi_make_types(MPI_Datatype types[], const int dims, const _XMP_array_section_t sections[], const size_t element_size) { for(int i = dims - 1; i >= 0; i--){ const _XMP_array_section_t *section = sections + i; const int count = section->length; const int blocklength = (i == dims - 1)? element_size : 1; const int stride = section->distance * section->stride; const MPI_Datatype oldtype = (i == dims - 1)? MPI_BYTE : types[i + 1]; XACC_DEBUG("type, dim=%d, start=%lld, length=%lld, stride=%lld, (c,b,s)=(%d,%d,%d)\n", i, section->start, section->length, section->stride, count, blocklength, stride); MPI_Type_create_hvector(count, blocklength, stride, oldtype, types + i); MPI_Type_commit(types + i); } }
/* * Setup hvector type info and handlers. * * A hvector datatype is created by using following parameters. * nblock: Number of blocks. * blocklen: Number of elements in each block. * stride: Strided number of elements between blocks. * lb: Lower bound of the new datatype (ignored). * oldtype: Datatype of element. */ static int MTestTypeHvectorCreate(MPI_Aint nblock, MPI_Aint blocklen, MPI_Aint stride, MPI_Aint lb, MPI_Datatype oldtype, const char *typename_prefix, MTestDatatype * mtype) { int merr; char type_name[128]; MTestTypeReset(mtype); merr = MPI_Type_size(oldtype, &mtype->basesize); if (merr) MTestPrintError(merr); /* These sizes are in bytes (see the VectorInit code) */ mtype->stride = stride * mtype->basesize; mtype->blksize = blocklen * mtype->basesize; mtype->nblock = nblock; /* Hvector uses stride in bytes */ merr = MPI_Type_create_hvector(nblock, blocklen, mtype->stride, oldtype, &mtype->datatype); if (merr) MTestPrintError(merr); merr = MPI_Type_commit(&mtype->datatype); if (merr) MTestPrintError(merr); memset(type_name, 0, sizeof(type_name)); sprintf(type_name, "%s %s (%ld nblock %ld blocklen %ld stride)", typename_prefix, "hvector", nblock, blocklen, stride); merr = MPI_Type_set_name(mtype->datatype, (char *) type_name); if (merr) MTestPrintError(merr); /* User the same functions as vector, because mtype->stride is in bytes */ mtype->InitBuf = MTestTypeVectorInit; mtype->FreeBuf = MTestTypeFree; mtype->CheckBuf = MTestTypeVectorCheckbuf; return merr; }
int main( int argc, char **argv ) { int vcount = 16, vblock = vcount*vcount/2, vstride=2*vcount*vblock; int v2stride, typesize, packsize, i, position, errs = 0; char *inbuf, *outbuf, *outbuf2; MPI_Datatype ft1type, ft2type, ft3type; MPI_Datatype ftopttype; MPI_Aint lb, extent; double t0, t1; double tpack, tmanual, tpackopt; int ntry; MPI_Init( &argc, &argv ); MPI_Type_contiguous( 6, MPI_FLOAT, &ft1type ); MPI_Type_size( ft1type, &typesize ); v2stride = vcount * vcount * vcount * vcount * typesize; MPI_Type_vector( vcount, vblock, vstride, ft1type, &ft2type ); MPI_Type_create_hvector( 2, 1, v2stride, ft2type, &ft3type ); MPI_Type_commit( &ft3type ); MPI_Type_free( &ft1type ); MPI_Type_free( &ft2type ); #if defined(MPICH2) && defined(PRINT_DATATYPE_INTERNALS) /* To use MPIDU_Datatype_debug to print the datatype internals, you must configure MPICH2 with --enable-g=log */ if (verbose) { printf( "Original datatype:\n" ); MPIDU_Datatype_debug( ft3type, 10 ); } #endif /* The same type, but without using the contiguous type */ MPI_Type_vector( vcount, 6*vblock, 6*vstride, MPI_FLOAT, &ft2type ); MPI_Type_create_hvector( 2, 1, v2stride, ft2type, &ftopttype ); MPI_Type_commit( &ftopttype ); MPI_Type_free( &ft2type ); #if defined(MPICH2) && defined(PRINT_DATATYPE_INTERNALS) if (verbose) { printf( "\n\nMerged datatype:\n" ); MPIDU_Datatype_debug( ftopttype, 10 ); } #endif MPI_Type_get_extent( ft3type, &lb, &extent ); MPI_Type_size( ft3type, &typesize ); MPI_Pack_size( 1, ft3type, MPI_COMM_WORLD, &packsize ); inbuf = (char *)malloc( extent ); outbuf = (char *)malloc( packsize ); outbuf2 = (char *)malloc( packsize ); if (!inbuf) { fprintf( stderr, "Unable to allocate %ld for inbuf\n", (long)extent ); MPI_Abort( MPI_COMM_WORLD, 1 ); } if (!outbuf) { fprintf( stderr, "Unable to allocate %ld for outbuf\n", (long)packsize ); MPI_Abort( MPI_COMM_WORLD, 1 ); } if (!outbuf2) { fprintf( stderr, "Unable to allocate %ld for outbuf2\n", (long)packsize ); MPI_Abort( MPI_COMM_WORLD, 1 ); } for (i=0; i<extent; i++) { inbuf[i] = i & 0x7f; } position = 0; /* Warm up the code and data */ MPI_Pack( inbuf, 1, ft3type, outbuf, packsize, &position, MPI_COMM_WORLD ); /* Pack using the vector of vector of contiguous */ tpack = 1e12; for (ntry = 0; ntry < 5; ntry++) { position = 0; t0 = MPI_Wtime(); MPI_Pack( inbuf, 1, ft3type, outbuf, packsize, &position, MPI_COMM_WORLD ); t1 = MPI_Wtime() - t0; if (t1 < tpack) tpack = t1; } MPI_Type_free( &ft3type ); /* Pack using vector of vector with big blocks (same type map) */ tpackopt = 1e12; for (ntry = 0; ntry < 5; ntry++) { position = 0; t0 = MPI_Wtime(); MPI_Pack( inbuf, 1, ftopttype, outbuf, packsize, &position, MPI_COMM_WORLD ); t1 = MPI_Wtime() - t0; if (t1 < tpackopt) tpackopt = t1; } MPI_Type_free( &ftopttype ); /* User (manual) packing code. Note that we exploit the fact that the vector type contains vblock instances of a contiguous type of size 24, or equivalently a single block of 24*vblock bytes. */ tmanual = 1e12; for (ntry = 0; ntry < 5; ntry++) { const char *ppe = (const char *)inbuf; int k, j; t0 = MPI_Wtime(); position = 0; for (k=0; k<2; k++) { /* hvector count; blocksize is 1 */ const char *ptr = ppe; for (j=0; j<vcount; j++) { /* vector count */ memcpy( outbuf2 + position, ptr, 24*vblock ); ptr += vstride * 24; position += 24*vblock; } ppe += v2stride; } t1 = MPI_Wtime() - t0; if (t1 < tmanual) tmanual = t1; /* Check on correctness */ #ifdef PACK_IS_NATIVE if (memcmp( outbuf, outbuf2, position ) != 0) { printf( "Panic - pack buffers differ\n" ); } #endif } if (verbose) { printf( "Bytes packed = %d\n", position ); printf( "MPI_Pack time = %e, opt version = %e, manual pack time = %e\n", tpack, tpackopt, tmanual ); } /* A factor of 4 is extremely generous, especially since the test suite no longer builds any of the tests with optimization */ if (4 * tmanual < tpack) { errs++; printf( "MPI_Pack time = %e, manual pack time = %e\n", tpack, tmanual ); printf( "MPI_Pack time should be less than 4 times the manual time\n" ); printf( "For most informative results, be sure to compile this test with optimization\n" ); } if (4 * tmanual < tpackopt) { errs++; printf( "MPI_Pack with opt = %e, manual pack time = %e\n", tpackopt, tmanual ); printf( "MPI_Pack time should be less than 4 times the manual time\n" ); printf( "For most informative results, be sure to compile this test with optimization\n" ); } if (errs) { printf( " Found %d errors\n", errs ); } else { printf( " No Errors\n" ); } free( inbuf ); free( outbuf ); free( outbuf2 ); MPI_Finalize(); return 0; }
int MPI_Type_hvector(int count, int length, int stride, MPI_Datatype oldtype, MPI_Datatype *newtype) { return MPI_Type_create_hvector(count, length, stride, oldtype, newtype); }
int main(int argc, char **argv) { MPI_File fh; MPI_Datatype file_type, mem_type; int *data = NULL; int *verify = NULL; int data_size = DATA_SIZE; int i, j, k, nr_errors = 0; MPI_Aint disp[BLK_COUNT]; int block_lens[BLK_COUNT]; char *filename = "unnamed.dat"; MPI_Status status; MPI_Request request; MPI_Init(&argc, &argv); disp[0] = (MPI_Aint) (PAD); disp[1] = (MPI_Aint) (data_size * 1 + PAD); disp[2] = (MPI_Aint) (data_size * 2 + PAD); block_lens[0] = data_size; block_lens[1] = data_size; block_lens[2] = data_size; data = malloc(data_size); verify = malloc(data_size * BLK_COUNT + HEADER + PAD); for (i = 0; i < data_size / sizeof(int); i++) data[i] = i; MPI_Type_create_hindexed_block(BLK_COUNT, data_size, disp, MPI_BYTE, &file_type); MPI_Type_commit(&file_type); MPI_Type_create_hvector(BLK_COUNT, data_size, 0, MPI_BYTE, &mem_type); MPI_Type_commit(&mem_type); if (1 < argc) filename = argv[1]; CHECK(MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &fh) != 0); CHECK(MPI_File_set_view(fh, HEADER, MPI_BYTE, file_type, "native", MPI_INFO_NULL)); /* write everything */ CHECK(MPI_File_iwrite_at_all(fh, 0, data, 1, mem_type, &request)); MPI_Wait(&request, &status); /* verify */ CHECK(MPI_File_set_view(fh, 0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL)); CHECK(MPI_File_iread_at_all(fh, 0, verify, (HEADER + PAD + BLK_COUNT * DATA_SIZE) / sizeof(int), MPI_INT, &request)); MPI_Wait(&request, &status); /* header and block padding should have no data */ for (i = 0; i < (HEADER + PAD) / sizeof(int); i++) { if (verify[i] != 0) { nr_errors++; fprintf(stderr, "expected 0, read %d\n", verify[i]); } } /* blocks are replicated */ for (j = 0; j < BLK_COUNT; j++) { for (k = 0; k < (DATA_SIZE / sizeof(int)); k++) { if (verify[(HEADER + PAD) / sizeof(int) + k + j * (DATA_SIZE / sizeof(int))] != data[k]) { nr_errors++; fprintf(stderr, "expcted %d, read %d\n", data[k], verify[(HEADER + PAD) / sizeof(int) + k + j * (DATA_SIZE / sizeof(int))]); } i++; } } MPI_File_close(&fh); MPI_Type_free(&mem_type); MPI_Type_free(&file_type); if (nr_errors == 0) printf(" No Errors\n"); MPI_Finalize(); free(data); return 0; }
int main ( int argc, char *argv[] ) { // Solution arrays real *h_u; /* to be allocated in ROOT only */ real *t_u; real *t_un; // Auxiliary variables int rank; int size; int step; dmn domain; double wtime; int nbrs[6]; int i, j, k; // Initialize MPI MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); // if number of np != Sx*Sy*Sz then terminate. if (size != SX*SY*SZ){ if (rank==ROOT) fprintf(stderr,"%s: Needs at least %d processors.\n", argv[0], SX*SY*SZ); MPI_Finalize(); return 1; } // verify subsizes if (NX%SX!=0 || NY%SY!=0 || NZ%SZ!=0) { if (rank==ROOT) fprintf(stderr,"%s: Subdomain sizes not an integer value.\n", argv[0]); MPI_Finalize(); return 1; } // Build a 2D cartessian communicator MPI_Comm Comm3d; int ndim=3; int dim[3]={SZ,SY,SX}; // domain decomposition subdomains int period[3]={false,false,false}; // periodic conditions int reorder={true}; // allow reorder if necesary int coord[3]; MPI_Cart_create(MPI_COMM_WORLD,ndim,dim,period,reorder,&Comm3d); MPI_Comm_rank(Comm3d,&rank); // rank wrt to Comm2d MPI_Cart_coords(Comm3d,rank,3,coord); // rank coordinates // Map the neighbours ranks MPI_Cart_shift(Comm3d,0,1,&nbrs[TOP],&nbrs[BOTTOM]); MPI_Cart_shift(Comm3d,1,1,&nbrs[NORTH],&nbrs[SOUTH]); MPI_Cart_shift(Comm3d,2,1,&nbrs[WEST],&nbrs[EAST]); // Manage Domain sizes domain = Manage_Domain(rank,size,coord,nbrs); // Allocate Memory Manage_Memory(0,domain,&h_u,&t_u,&t_un); // Root mode: Build Initial Condition if (domain.rank==ROOT) Call_IC(2,h_u); // Build MPI data types MPI_Datatype myGlobal; MPI_Datatype myLocal; MPI_Datatype xySlice; MPI_Datatype yzSlice; MPI_Datatype xzSlice; //Manage_DataTypes(0,domain,&xySlice,&yzSlice,&xzSlice,&myLocal,&myGlobal); // Build a MPI data type for a subarray in Root processor MPI_Datatype global; int nx = domain.nx; int ny = domain.ny; int nz = domain.nz; int bigsizes[3] = {NZ,NY,NX}; int subsizes[3] = {nz,ny,nx}; int starts[3] = {0,0,0}; MPI_Type_create_subarray(3, bigsizes, subsizes, starts, MPI_ORDER_C, MPI_CUSTOM_REAL, &global); MPI_Type_create_resized(global, 0, nx*sizeof(real), &myGlobal); // extend the type MPI_Type_commit(&myGlobal); // Build a MPI data type for a subarray in workers int bigsizes2[3] = {R+nz+R,R+ny+R,R+nx+R}; int subsizes2[3] = {nz,ny,nx}; int starts2[3] = {R,R,R}; MPI_Type_create_subarray(3, bigsizes2, subsizes2, starts2, MPI_ORDER_C, MPI_CUSTOM_REAL, &myLocal); MPI_Type_commit(&myLocal); // now we can use this MPI costum data type // halo data types MPI_Datatype yVector; MPI_Type_vector( ny, nx, nx+2*R, MPI_CUSTOM_REAL, &xySlice); MPI_Type_commit(&xySlice); MPI_Type_vector( ny, 1, nx+2*R, MPI_CUSTOM_REAL, &yVector); MPI_Type_create_hvector(nz, 1, (nx+2*R)*(ny+2*R)*sizeof(real), yVector, &yzSlice); MPI_Type_commit(&yzSlice); MPI_Type_vector( nz, nx, (nx+2*R)*(ny+2*R), MPI_CUSTOM_REAL, &xzSlice); MPI_Type_commit(&xzSlice); // build sendcounts and displacements in root processor int sendcounts[size], displs[size]; if (rank==ROOT) { for (i=0; i<size; i++) sendcounts[i]=1; int disp = 0; // displacement counter for (k=0; k<SZ; k++) { for (j=0; j<SY; j++) { for (i=0; i<SX; i++) { displs[i+SX*j+SX*SY*k]=disp; disp+=1; // x-displacements } disp += SX*(ny-1); // y-displacements } disp += SX*NY*(nz-1); // z-displacements } } // Scatter global array data and exchange halo regions MPI_Scatterv(h_u, sendcounts, displs, myGlobal, t_u, 1, myLocal, ROOT, Comm3d); Manage_Comms(domain,Comm3d,xySlice,yzSlice,xzSlice,t_u); MPI_Barrier(Comm3d); // ROOT mode: Record the starting time. if (rank==ROOT) wtime=MPI_Wtime(); // Asynchronous MPI Solver for (step = 0; step < NO_STEPS; step+=2) { // print iteration in ROOT mode if (rank==ROOT && step%10000==0) printf(" Step %d of %d\n",step,(int)NO_STEPS); // Exchange Boundaries and compute stencil Call_Laplace(domain,&t_u,&t_un);Manage_Comms(domain,Comm3d,xySlice,yzSlice,xzSlice,t_un);//1stIter Call_Laplace(domain,&t_un,&t_u);Manage_Comms(domain,Comm3d,xySlice,yzSlice,xzSlice,t_u );//2ndIter } // ROOT mode: Record the final time. if (rank==ROOT) { wtime = MPI_Wtime()-wtime; printf ("\n Wall clock elapsed = %f seconds\n\n", wtime ); } /* // CAREFUL: uncomment only for debugging. Print subroutine for (int p=0; p<size; p++) { if (rank == p) { printf("Local process on rank %d is:\n", rank); for (k=0; k<nz+2*R; k++) { printf("-- layer %d --\n",k); for (j=0; j<ny+2*R; j++) { putchar('|'); for (i=0; i<nx+2*R; i++) printf("%3.0f ",t_u[i+(nx+2*R)*j+(nx+2*R)*(ny+2*R)*k]); printf("|\n"); } printf("\n"); } } MPI_Barrier(Comm3d); }*/ // gather all pieces into the big data array MPI_Gatherv(t_u, 1, myLocal, h_u, sendcounts, displs, myGlobal, ROOT, Comm3d); // save results to file //if (rank==0) Print(h_u,NX,NY,NZ); if (rank==ROOT) Save_Results(h_u); // Free MPI types Manage_DataTypes(1,domain,&xySlice,&yzSlice,&xzSlice,&myLocal,&myGlobal); // Free Memory Manage_Memory(1,domain,&h_u,&t_u,&t_un); // finalize MPI MPI_Finalize(); // ROOT mode: Terminate. if (rank==ROOT) { printf ("HEAT_MPI:\n" ); printf (" Normal end of execution.\n\n" ); } return 0; }
/*----< ncmpii_vars_create_filetype() >--------------------------------------*/ int ncmpii_vars_create_filetype(NC *ncp, NC_var *varp, const MPI_Offset start[], const MPI_Offset count[], const MPI_Offset stride[], int rw_flag, MPI_Offset *offset_ptr, MPI_Datatype *filetype_ptr) { int dim, status; MPI_Offset offset, nelems=1; MPI_Datatype filetype; if (stride == NULL) return ncmpii_vara_create_filetype(ncp, varp, start, count, rw_flag, offset_ptr, filetype_ptr); offset = varp->begin; filetype = MPI_BYTE; for (dim=0; dim<varp->ndims && stride[dim]==1; dim++) ; if (dim == varp->ndims) return ncmpii_vara_create_filetype(ncp, varp, start, count, rw_flag, offset_ptr, filetype_ptr); /* New coordinate/edge check to fix NC_EINVALCOORDS bug */ status = NCedgeck(ncp, varp, start, count); if ((status != NC_NOERR) || (rw_flag == READ_REQ && IS_RECVAR(varp) && *start + *count > NC_get_numrecs(ncp))) { status = NCcoordck(ncp, varp, start); if (status != NC_NOERR) return status; else return NC_EEDGE; } status = NCstrideedgeck(ncp, varp, start, count, stride); if (status != NC_NOERR) return status; if ( rw_flag == READ_REQ && IS_RECVAR(varp) && ( (*count > 0 && *start+1 + (*count-1) * *stride > NC_get_numrecs(ncp)) || (*count == 0 && *start > NC_get_numrecs(ncp)) ) ) return NC_EEDGE; for (dim=0; dim<varp->ndims; dim++) nelems *= count[dim]; /* filetype is defined only when varp is not a scalar and the number of requested elemenst > 0 (varp->ndims == 0 meaning this is a scalar variable) Otherwise, keep filetype MPI_BYTE */ if (varp->ndims > 0 && nelems > 0) { int ndims; MPI_Datatype tmptype; MPI_Offset *blocklens, *blockstride, *blockcount; ndims = varp->ndims; blocklens = (MPI_Offset*) NCI_Malloc(3 * ndims * sizeof(MPI_Offset)); blockstride = blocklens + ndims; blockcount = blockstride + ndims; tmptype = MPI_BYTE; blocklens[ndims-1] = varp->xsz; blockcount[ndims-1] = count[ndims-1]; if (ndims == 1 && IS_RECVAR(varp)) { check_recsize_too_big(ncp); blockstride[ndims-1] = stride[ndims-1] * ncp->recsize; offset += start[ndims - 1] * ncp->recsize; } else { blockstride[ndims-1] = stride[ndims-1] * varp->xsz; offset += start[ndims-1] * varp->xsz; } for (dim=ndims-1; dim>=0; dim--) { #if (MPI_VERSION < 2) MPI_Type_hvector(blockcount[dim], blocklens[dim], blockstride[dim], tmptype, &filetype); #else MPI_Type_create_hvector(blockcount[dim], blocklens[dim], blockstride[dim], tmptype, &filetype); #endif MPI_Type_commit(&filetype); if (tmptype != MPI_BYTE) MPI_Type_free(&tmptype); tmptype = filetype; if (dim - 1 >= 0) { blocklens[dim-1] = 1; blockcount[dim-1] = count[dim - 1]; if (dim-1 == 0 && IS_RECVAR(varp)) { blockstride[dim-1] = stride[dim-1] * ncp->recsize; offset += start[dim-1] * ncp->recsize; } else { blockstride[dim-1] = stride[dim-1] * varp->dsizes[dim] * varp->xsz; offset += start[dim-1] * varp->dsizes[dim] * varp->xsz; } } } NCI_Free(blocklens); } *offset_ptr = offset; *filetype_ptr = filetype; return NC_NOERR; }
/*----< ncmpii_vara_create_filetype() >--------------------------------------*/ static int ncmpii_vara_create_filetype(NC *ncp, NC_var *varp, const MPI_Offset *start, const MPI_Offset *count, int rw_flag, MPI_Offset *offset_ptr, MPI_Datatype *filetype_ptr) { int dim, status; MPI_Offset offset, nelems=1; MPI_Datatype filetype; offset = varp->begin; filetype = MPI_BYTE; /* New coordinate/edge check to fix NC_EINVALCOORDS bug */ status = NCedgeck(ncp, varp, start, count); if (status != NC_NOERR || (rw_flag == READ_REQ && IS_RECVAR(varp) && *start + *count > NC_get_numrecs(ncp))) { status = NCcoordck(ncp, varp, start); if (status != NC_NOERR) return status; else return NC_EEDGE; } /* check if the request is contiguous in file if yes, there is no need to create a filetype */ if (ncmpii_is_request_contiguous(varp, start, count)) { status = ncmpii_get_offset(ncp, varp, start, NULL, NULL, &offset); *offset_ptr = offset; *filetype_ptr = filetype; return status; } for (dim=0; dim<varp->ndims; dim++) nelems *= count[dim]; /* filetype is defined only when varp is not a scalar and the number of requested elemenst > 0 (varp->ndims == 0 meaning this is a scalar variable) Otherwise, keep filetype MPI_BYTE */ if (varp->ndims > 0 && nelems > 0) { int i, ndims, blklens[3], tag=0; int *shape=NULL, *subcount=NULL, *substart=NULL; /* all in bytes */ MPI_Offset *shape64=NULL, *subcount64=NULL, *substart64=NULL; MPI_Offset size, disps[3]; MPI_Datatype rectype, types[3], type1; ndims = varp->ndims; shape = (int*) NCI_Malloc(3 * ndims * sizeof(int)); subcount = shape + ndims; substart = subcount + ndims; /* here, request size has been checked and it must > 0 */ if (IS_RECVAR(varp)) { subcount[0] = count[0]; substart[0] = 0; shape[0] = subcount[0]; if (ncp->recsize <= varp->len) { /* the only record variable */ if (varp->ndims == 1) { shape[0] *= varp->xsz; subcount[0] *= varp->xsz; } else { for (dim = 1; dim < ndims-1; dim++) { shape[dim] = varp->shape[dim]; subcount[dim] = count[dim]; substart[dim] = start[dim]; } shape[dim] = varp->xsz * varp->shape[dim]; subcount[dim] = varp->xsz * count[dim]; substart[dim] = varp->xsz * start[dim]; } offset += start[0] * ncp->recsize; MPI_Type_create_subarray(ndims, shape, subcount, substart, MPI_ORDER_C, MPI_BYTE, &filetype); MPI_Type_commit(&filetype); } else { check_recsize_too_big(ncp); /* more than one record variables */ offset += start[0] * ncp->recsize; if (varp->ndims == 1) { #if (MPI_VERSION < 2) MPI_Type_hvector(subcount[0], varp->xsz, ncp->recsize, MPI_BYTE, &filetype); #else MPI_Type_create_hvector(subcount[0], varp->xsz, ncp->recsize, MPI_BYTE, &filetype); #endif MPI_Type_commit(&filetype); } else { for (dim = 1; dim < ndims-1; dim++) { shape[dim] = varp->shape[dim]; subcount[dim] = count[dim]; substart[dim] = start[dim]; } shape[dim] = varp->xsz * varp->shape[dim]; subcount[dim] = varp->xsz * count[dim]; substart[dim] = varp->xsz * start[dim]; MPI_Type_create_subarray(ndims-1, shape+1, subcount+1, substart+1, MPI_ORDER_C, MPI_BYTE, &rectype); MPI_Type_commit(&rectype); #if (MPI_VERSION < 2) MPI_Type_hvector(subcount[0], 1, ncp->recsize, rectype, &filetype); #else MPI_Type_create_hvector(subcount[0], 1, ncp->recsize, rectype, &filetype); #endif MPI_Type_commit(&filetype); MPI_Type_free(&rectype); } } } else { /* non record variable */ tag = 0; for (dim=0; dim< ndims-1; dim++) { if (varp->shape[dim] > 2147483647) { /* if shape > 2^31-1 */ tag = 1; break; } } if ((varp->shape[dim]*varp->xsz) > 2147483647) tag = 1; if (tag == 0) { for (dim = 0; dim < ndims-1; dim++ ) { shape[dim] = varp->shape[dim]; subcount[dim] = count[dim]; substart[dim] = start[dim]; } shape[dim] = varp->xsz * varp->shape[dim]; subcount[dim] = varp->xsz * count[dim]; substart[dim] = varp->xsz * start[dim]; MPI_Type_create_subarray(ndims, shape, subcount, substart, MPI_ORDER_C, MPI_BYTE, &filetype); MPI_Type_commit(&filetype); } else { shape64 = (MPI_Offset*) NCI_Malloc(3 * ndims * sizeof(MPI_Offset)); subcount64 = shape64 + ndims; substart64 = subcount64 + ndims; if (ndims == 1) { // for 64-bit support, added July 23, 2008 shape64[0] = varp->shape[0]; subcount64[0] = count[0]; substart64[0] = start[0]; offset += start[0]*varp->xsz; MPI_Type_contiguous(subcount64[0]*varp->xsz, MPI_BYTE, &type1); MPI_Type_commit(&type1); #if (MPI_VERSION < 2) MPI_Type_hvector(subcount64[0], varp->xsz, shape64[0]*varp->xsz, MPI_BYTE, &filetype); #else MPI_Type_create_hvector(1, 1, shape64[0]*varp->xsz, type1, &filetype); #endif MPI_Type_commit(&filetype); MPI_Type_free(&type1); } else { for (dim = 0; dim < ndims-1; dim++ ) { shape64[dim] = varp->shape[dim]; subcount64[dim] = count[dim]; substart64[dim] = start[dim]; } shape64[dim] = varp->xsz * varp->shape[dim]; subcount64[dim] = varp->xsz * count[dim]; substart64[dim] = varp->xsz * start[dim]; MPI_Type_hvector(subcount64[dim-1], subcount64[dim], varp->xsz * varp->shape[dim], MPI_BYTE, &type1); MPI_Type_commit(&type1); size = shape[dim]; for (i=dim-2; i>=0; i--) { size *= shape[i+1]; MPI_Type_hvector(subcount64[i], 1, size, type1, &filetype); MPI_Type_commit(&filetype); MPI_Type_free(&type1); type1 = filetype; } disps[1] = substart64[dim]; size = 1; for (i=dim-1; i>=0; i--) { size *= shape64[i+1]; disps[1] += size*substart64[i]; } disps[2] = 1; for (i=0; i<ndims; i++) disps[2] *= shape64[i]; disps[0] = 0; blklens[0] = blklens[1] = blklens[2] = 1; types[0] = MPI_LB; types[1] = type1; types[2] = MPI_UB; MPI_Type_struct(3, blklens, (MPI_Aint*) disps, types, &filetype); MPI_Type_free(&type1); } NCI_Free(shape64); } } NCI_Free(shape); } *offset_ptr = offset; *filetype_ptr = filetype; return NC_NOERR; }
FORT_DLL_SPEC void FORT_CALL mpi_type_create_hvector_ ( MPI_Fint *v1, MPI_Fint *v2, MPI_Aint * v3, MPI_Fint *v4, MPI_Fint *v5, MPI_Fint *ierr ){ *ierr = MPI_Type_create_hvector( *v1, *v2, *v3, (MPI_Datatype)(*v4), (MPI_Datatype *)(v5) ); }