IOR_offset_t IOR_Xfer_NCMPI(int access, void * fd, IOR_size_t * buffer, IOR_offset_t length, IOR_param_t * param) { char * bufferPtr = (char *)buffer; static int firstReadCheck = FALSE, startDataSet; int var_id, dim_id[NUM_DIMS]; MPI_Offset bufSize[NUM_DIMS], offset[NUM_DIMS]; IOR_offset_t segmentPosition; int segmentNum, transferNum; /* Wei-keng Liao: In IOR.c line 1979 says "block size must be a multiple of transfer size." Hence, length should always == param->transferSize below. I leave it here to double check. */ if (length != param->transferSize) { char errMsg[256]; sprintf(errMsg,"length(%lld) != param->transferSize(%lld)\n", length, param->transferSize); NCMPI_CHECK(-1, errMsg); } /* determine by offset if need to start data set */ if (param->filePerProc == TRUE) { segmentPosition = (IOR_offset_t)0; } else { segmentPosition = (IOR_offset_t)((rank + rankOffset) % param->numTasks) * param->blockSize; } if ((int)(param->offset - segmentPosition) == 0) { startDataSet = TRUE; /* * this toggle is for the read check operation, which passes through * this function twice; note that this function will open a data set * only on the first read check and close only on the second */ if (access == READCHECK) { if (firstReadCheck == TRUE) { firstReadCheck = FALSE; } else { firstReadCheck = TRUE; } } } if (startDataSet == TRUE && (access != READCHECK || firstReadCheck == TRUE)) { if (access == WRITE) { int numTransfers = param->blockSize / param->transferSize; /* Wei-keng Liao: change 1D array to 3D array of dimensions: [segmentCount*numTasksWorld][numTransfers][transferSize] Requirement: none of these dimensions should be > 4G, */ NCMPI_CHECK(ncmpi_def_dim(*(int *)fd, "segments_times_np", NC_UNLIMITED, &dim_id[0]), "cannot define data set dimensions"); NCMPI_CHECK(ncmpi_def_dim(*(int *)fd, "number_of_transfers", numTransfers, &dim_id[1]), "cannot define data set dimensions"); NCMPI_CHECK(ncmpi_def_dim(*(int *)fd, "transfer_size", param->transferSize, &dim_id[2]), "cannot define data set dimensions"); NCMPI_CHECK(ncmpi_def_var(*(int *)fd, "data_var", NC_BYTE, NUM_DIMS, dim_id, &var_id), "cannot define data set variables"); NCMPI_CHECK(ncmpi_enddef(*(int *)fd), "cannot close data set define mode"); } else { NCMPI_CHECK(ncmpi_inq_varid(*(int *)fd, "data_var", &var_id), "cannot retrieve data set variable"); } if (param->collective == FALSE) { NCMPI_CHECK(ncmpi_begin_indep_data(*(int *)fd), "cannot enable independent data mode"); } param->var_id = var_id; startDataSet = FALSE; } var_id = param->var_id; /* Wei-keng Liao: calculate the segment number */ segmentNum = param->offset / (param->numTasks * param->blockSize); /* Wei-keng Liao: calculate the transfer number in each block */ transferNum = param->offset % param->blockSize / param->transferSize; /* Wei-keng Liao: read/write the 3rd dim of the dataset, each is of amount param->transferSize */ bufSize[0] = 1; bufSize[1] = 1; bufSize[2] = param->transferSize; offset[0] = segmentNum * numTasksWorld + rank; offset[1] = transferNum; offset[2] = 0; /* access the file */ if (access == WRITE) { /* WRITE */ if (param->collective) { NCMPI_CHECK(ncmpi_put_vara_all(*(int *)fd, var_id, offset, bufSize, bufferPtr, length, MPI_BYTE), "cannot write to data set"); } else { NCMPI_CHECK(ncmpi_put_vara(*(int *)fd, var_id, offset, bufSize, bufferPtr, length, MPI_BYTE), "cannot write to data set"); } } else { /* READ or CHECK */ if (param->collective == TRUE) { NCMPI_CHECK(ncmpi_get_vara_all(*(int *)fd, var_id, offset, bufSize, bufferPtr, length, MPI_BYTE), "cannot read from data set"); } else { NCMPI_CHECK(ncmpi_get_vara(*(int *)fd, var_id, offset, bufSize, bufferPtr, length, MPI_BYTE), "cannot read from data set"); } } return(length); } /* IOR_Xfer_NCMPI() */
/* * adapted from HydroRunBaseMpi::outputPnetcdf * * assumes here that localData have size nx,ny,nz (no ghostWidth) * * see : test_pnetcdf_write.cpp * * Note that if ghostIncluded is false local_data must be sized upon nx,ny,nz * if not size must be nx+2*ghostWidth,ny+2*ghostWidth,nz+2*ghostWidth * */ void write_pnetcdf(const std::string &filename, HostArray<double> &localData, ConfigMap &configMap) { int myRank; MPI_Comm_rank(MPI_COMM_WORLD, &myRank); // read local domain sizes int nx=configMap.getInteger("mesh","nx",32); int ny=configMap.getInteger("mesh","ny",32); int nz=configMap.getInteger("mesh","nz",32); // read mpi geometry int mx=configMap.getInteger("mpi","mx",1); int my=configMap.getInteger("mpi","my",1); int mz=configMap.getInteger("mpi","mz",1); // MPI cartesian coordinates // myRank = mpiCoord[0] + mx*mpiCoord[1] + mx*my*mpiCoord[2] int mpiCoord[3]; { mpiCoord[2] = myRank/(mx*my); mpiCoord[1] = (myRank - mx*my*mpiCoord[2])/mx; mpiCoord[0] = myRank - mx*my*mpiCoord[2] -mx*mpiCoord[1]; } bool ghostIncluded = configMap.getBool("output", "ghostIncluded",false); int ghostWidth = configMap.getInteger("mesh","ghostWidth",3); // global size int NX=nx*mx, NY=ny*my, NZ=nz*mz; int gsizes[3]; gsizes[IZ] = NX; gsizes[IY] = NY; gsizes[IX] = NZ; if ( ghostIncluded ) { gsizes[IZ] += 2*ghostWidth; gsizes[IY] += 2*ghostWidth; gsizes[IX] += 2*ghostWidth; } // netcdf file id int ncFileId; int err; // file creation mode int ncCreationMode = NC_CLOBBER; bool useCDF5 = configMap.getBool("output","pnetcdf_cdf5",false); if (useCDF5) ncCreationMode = NC_CLOBBER|NC_64BIT_DATA; else // use CDF-2 file format ncCreationMode = NC_CLOBBER|NC_64BIT_OFFSET; // verbose log ? bool pnetcdf_verbose = configMap.getBool("output","pnetcdf_verbose",false); int nbVar=8; int dimIds[3], varIds[nbVar]; MPI_Offset write_size, sum_write_size; MPI_Info mpi_info_used; char str[512]; // time measurement variables double write_timing, max_write_timing, write_bw; /* * writing parameter (offset and size) */ MPI_Offset starts[3] = {0}; MPI_Offset counts[3] = {nz, ny, nx}; // take care that row-major / column major format starts[IZ] = mpiCoord[IX]*nx; starts[IY] = mpiCoord[IY]*ny; starts[IX] = mpiCoord[IZ]*nz; if ( ghostIncluded ) { if ( mpiCoord[IX] == 0 ) counts[IZ] += ghostWidth; if ( mpiCoord[IY] == 0 ) counts[IY] += ghostWidth; if ( mpiCoord[IZ] == 0 ) counts[IX] += ghostWidth; if ( mpiCoord[IX] == mx-1 ) counts[IZ] += ghostWidth; if ( mpiCoord[IY] == my-1 ) counts[IY] += ghostWidth; if ( mpiCoord[IZ] == mz-1 ) counts[IX] += ghostWidth; starts[IZ] += ghostWidth; starts[IY] += ghostWidth; starts[IX] += ghostWidth; if ( mpiCoord[IX] == 0 ) starts[IZ] -= ghostWidth; if ( mpiCoord[IY] == 0 ) starts[IY] -= ghostWidth; if ( mpiCoord[IZ] == 0 ) starts[IX] -= ghostWidth; } /* * Create NetCDF file */ err = ncmpi_create(MPI_COMM_WORLD, filename.c_str(), ncCreationMode, MPI_INFO_NULL, &ncFileId); if (err != NC_NOERR) { printf("Error: ncmpi_create() file %s (%s)\n",filename.c_str(),ncmpi_strerror(err)); MPI_Abort(MPI_COMM_WORLD, -1); exit(1); } /* * Define dimensions */ err = ncmpi_def_dim(ncFileId, "x", gsizes[0], &dimIds[0]); PNETCDF_HANDLE_ERROR; err = ncmpi_def_dim(ncFileId, "y", gsizes[1], &dimIds[1]); PNETCDF_HANDLE_ERROR; err = ncmpi_def_dim(ncFileId, "z", gsizes[2], &dimIds[2]); PNETCDF_HANDLE_ERROR; /* * Define variables */ nc_type ncDataType = NC_DOUBLE; MPI_Datatype mpiDataType = MPI_DOUBLE; err = ncmpi_def_var(ncFileId, "rho", ncDataType, 3, dimIds, &varIds[ID]); PNETCDF_HANDLE_ERROR; err = ncmpi_def_var(ncFileId, "E", ncDataType, 3, dimIds, &varIds[IP]); PNETCDF_HANDLE_ERROR; err = ncmpi_def_var(ncFileId, "rho_vx", ncDataType, 3, dimIds, &varIds[IU]); PNETCDF_HANDLE_ERROR; err = ncmpi_def_var(ncFileId, "rho_vy", ncDataType, 3, dimIds, &varIds[IV]); PNETCDF_HANDLE_ERROR; err = ncmpi_def_var(ncFileId, "rho_vz", ncDataType, 3, dimIds, &varIds[IW]); PNETCDF_HANDLE_ERROR; err = ncmpi_def_var(ncFileId, "Bx", ncDataType, 3, dimIds, &varIds[IA]); PNETCDF_HANDLE_ERROR; err = ncmpi_def_var(ncFileId, "By", ncDataType, 3, dimIds, &varIds[IB]); PNETCDF_HANDLE_ERROR; err = ncmpi_def_var(ncFileId, "Bz", ncDataType, 3, dimIds, &varIds[IC]); PNETCDF_HANDLE_ERROR; /* * global attributes */ // did we use CDF-2 or CDF-5 { int useCDF5_int = useCDF5 ? 1 : 0; err = ncmpi_put_att_int(ncFileId, NC_GLOBAL, "CDF-5 mode", NC_INT, 1, &useCDF5_int); PNETCDF_HANDLE_ERROR; } /* * exit the define mode */ err = ncmpi_enddef(ncFileId); PNETCDF_HANDLE_ERROR; /* * Get all the MPI_IO hints used */ err = ncmpi_get_file_info(ncFileId, &mpi_info_used); PNETCDF_HANDLE_ERROR; int nItems = counts[IX]*counts[IY]*counts[IZ]; for (int iVar=0; iVar<nbVar; iVar++) { double *data = &(localData(0,0,0,iVar)); err = ncmpi_put_vara_all(ncFileId, varIds[iVar], starts, counts, data, nItems, mpiDataType); PNETCDF_HANDLE_ERROR; } /* * close the file */ err = ncmpi_close(ncFileId); PNETCDF_HANDLE_ERROR; } // write_pnetcdf
/** * Write a parallel-nedcdf file. * * We assume here that localData is a scalar. * * Pnetcdf uses row-major format (same as FFTW). * * \param[in] filename : PnetCDF filename * \param[in] starts : offset to where to start reading data * \param[in] counts : number of elements read (3D sub-domain inside global) * \param[in] gsizes : global sizes * \param[in] localData : actual data buffer (size : nx*ny*nz*sizeof(float)) * */ void write_pnetcdf(const std::string &filename, MPI_Offset starts[3], MPI_Offset counts[3], int gsizes[3], float *localData) { int myRank; MPI_Comm_rank(MPI_COMM_WORLD, &myRank); // netcdf file id int ncFileId; int err; // file creation mode int ncCreationMode = NC_CLOBBER; // CDF-5 is almost mandatory for very large files (>= 2x10^9 cells) // not useful here bool useCDF5 = false; if (useCDF5) ncCreationMode = NC_CLOBBER|NC_64BIT_DATA; else // use CDF-2 file format ncCreationMode = NC_CLOBBER|NC_64BIT_OFFSET; // verbose log ? //bool pnetcdf_verbose = false; int nbVar=1; int dimIds[3], varIds[nbVar]; //MPI_Offset write_size, sum_write_size; MPI_Info mpi_info_used; //char str[512]; // time measurement variables //float write_timing, max_write_timing, write_bw; /* * Create NetCDF file */ err = ncmpi_create(MPI_COMM_WORLD, filename.c_str(), ncCreationMode, MPI_INFO_NULL, &ncFileId); if (err != NC_NOERR) { printf("Error: ncmpi_create() file %s (%s)\n",filename.c_str(),ncmpi_strerror(err)); MPI_Abort(MPI_COMM_WORLD, -1); exit(1); } /* * Define global dimensions */ err = ncmpi_def_dim(ncFileId, "x", gsizes[0], &dimIds[0]); PNETCDF_HANDLE_ERROR; err = ncmpi_def_dim(ncFileId, "y", gsizes[1], &dimIds[1]); PNETCDF_HANDLE_ERROR; err = ncmpi_def_dim(ncFileId, "z", gsizes[2], &dimIds[2]); PNETCDF_HANDLE_ERROR; /* * Define variables to write (give a name) */ nc_type ncDataType = NC_FLOAT; MPI_Datatype mpiDataType = MPI_FLOAT; err = ncmpi_def_var(ncFileId, "data", ncDataType, 3, dimIds, &varIds[0]); PNETCDF_HANDLE_ERROR; /* * global attributes */ // did we use CDF-2 or CDF-5 { int useCDF5_int = useCDF5 ? 1 : 0; err = ncmpi_put_att_int(ncFileId, NC_GLOBAL, "CDF-5 mode", NC_INT, 1, &useCDF5_int); PNETCDF_HANDLE_ERROR; } /* * exit the define mode */ err = ncmpi_enddef(ncFileId); PNETCDF_HANDLE_ERROR; /* * Get all the MPI_IO hints used */ err = ncmpi_get_file_info(ncFileId, &mpi_info_used); PNETCDF_HANDLE_ERROR; // copy data to write in intermediate buffer int nItems = counts[IX]*counts[IY]*counts[IZ]; { // debug // printf("Pnetcdf [rank=%d] starts=%lld %lld %lld, counts =%lld %lld %lld, gsizes=%d %d %d\n", // myRank, // starts[0],starts[1],starts[2], // counts[0],counts[1],counts[2], // gsizes[0],gsizes[1],gsizes[2]); /* * make sure PNetCDF doesn't complain when starts is outside of global domain * bound. When nItems is null, off course we don't write anything, but starts * offset have to be inside global domain. * So there is no harm, setting starts to origin. */ if (nItems == 0) { starts[0]=0; starts[1]=0; starts[2]=0; } err = ncmpi_put_vara_all(ncFileId, varIds[0], starts, counts, localData, nItems, mpiDataType); PNETCDF_HANDLE_ERROR; } /* * close the file */ err = ncmpi_close(ncFileId); PNETCDF_HANDLE_ERROR; } // write_pnetcdf
int main(int argc, char **argv) { int ret, ncfile, nprocs, rank, dimid, varid1, varid2, ndims=1; MPI_Offset start, count=1; char buf[13] = "Hello World\n"; int data; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); if (argc != 2) { if (rank == 0) printf("Usage: %s filename\n", argv[0]); MPI_Finalize(); exit(-1); } ret = ncmpi_create(MPI_COMM_WORLD, argv[1], NC_CLOBBER|NC_64BIT_OFFSET, MPI_INFO_NULL, &ncfile); if (ret != NC_NOERR) handle_error(ret, __LINE__); ret = ncmpi_def_dim(ncfile, "d1", nprocs, &dimid); if (ret != NC_NOERR) handle_error(ret, __LINE__); ret = ncmpi_def_var(ncfile, "v1", NC_INT, ndims, &dimid, &varid1); if (ret != NC_NOERR) handle_error(ret, __LINE__); ret = ncmpi_def_var(ncfile, "v2", NC_INT, ndims, &dimid, &varid2); if (ret != NC_NOERR) handle_error(ret, __LINE__); ret = ncmpi_put_att_text(ncfile, NC_GLOBAL, "string", 13, buf); if (ret != NC_NOERR) handle_error(ret, __LINE__); /* all processors defined the dimensions, attributes, and variables, * but here in ncmpi_enddef is the one place where metadata I/O * happens. Behind the scenes, rank 0 takes the information and writes * the netcdf header. All processes communicate to ensure they have * the same (cached) view of the dataset */ ret = ncmpi_enddef(ncfile); if (ret != NC_NOERR) handle_error(ret, __LINE__); start=rank, count=1, data=rank; /* in this simple example every process writes its rank to two 1d variables */ /* we used a basic MPI_INT type to this flexible mode call, but could * have used any derived MPI datatype that describes application data * structures */ ret = ncmpi_put_vara_all(ncfile, varid1, &start, &count, &data, count, MPI_INT); if (ret != NC_NOERR) handle_error(ret, __LINE__); ret = ncmpi_put_vara_all(ncfile, varid2, &start, &count, &data, count, MPI_INT); if (ret != NC_NOERR) handle_error(ret, __LINE__); ret = ncmpi_close(ncfile); if (ret != NC_NOERR) handle_error(ret, __LINE__); MPI_Finalize(); return 0; }