/* write out variable's data from in-memory structure */ void load_netcdf(void *rec_start) { int i, idim; int stat = NC_NOERR; MPI_Offset *start, *count; char *charvalp = NULL; short *shortvalp = NULL; int *intvalp = NULL; float *floatvalp = NULL; double *doublevalp = NULL; unsigned char *ubytevalp = NULL; unsigned short *ushortvalp = NULL; unsigned int *uintvalp = NULL; long long *int64valp = NULL; unsigned long long *uint64valp = NULL; MPI_Offset total_size; /* load values into variable */ switch (vars[varnum].type) { case NC_CHAR: case NC_BYTE: charvalp = (char *) rec_start; break; case NC_SHORT: shortvalp = (short *) rec_start; break; case NC_INT: intvalp = (int *) rec_start; break; case NC_FLOAT: floatvalp = (float *) rec_start; break; case NC_DOUBLE: doublevalp = (double *) rec_start; break; case NC_UBYTE: ubytevalp = (unsigned char *) rec_start; break; case NC_USHORT: ushortvalp = (unsigned short *) rec_start; break; case NC_UINT: uintvalp = (unsigned int *) rec_start; break; case NC_INT64: int64valp = (long long *) rec_start; break; case NC_UINT64: uint64valp = (unsigned long long *) rec_start; break; default: derror("Unhandled type %d\n", vars[varnum].type); break; } start = (MPI_Offset*) malloc(vars[varnum].ndims * 2 * sizeof(MPI_Offset)); count = start + vars[varnum].ndims; if (vars[varnum].ndims > 0) { /* initialize start to upper left corner (0,0,0,...) */ start[0] = 0; if (vars[varnum].dims[0] == rec_dim) { count[0] = vars[varnum].nrecs; } else { count[0] = dims[vars[varnum].dims[0]].size; } } for (idim = 1; idim < vars[varnum].ndims; idim++) { start[idim] = 0; count[idim] = dims[vars[varnum].dims[idim]].size; } total_size = nctypesize(vars[varnum].type); for (idim=0; idim<vars[varnum].ndims; idim++) total_size *= count[idim]; /* If the total put size is more than 2GB, then put one subarray at a time. * Here the subarray is from 1, 2, ... ndims, except 0. * This is not a perfect solution. To be improved. */ if (total_size > INT_MAX) { MPI_Offset nchunks=count[0]; MPI_Offset subarray_nelems=1; for (idim=1; idim<vars[varnum].ndims; idim++) subarray_nelems *= count[idim]; count[0] = 1; switch (vars[varnum].type) { case NC_BYTE: for (i=0; i<nchunks; i++) { start[0] = i; stat = ncmpi_put_vara_schar_all(ncid, varnum, start, count, (signed char *)charvalp); check_err(stat, "ncmpi_put_vara_schar_all", __func__, __LINE__, __FILE__); charvalp += subarray_nelems; } break; case NC_CHAR: for (i=0; i<nchunks; i++) { start[0] = i; stat = ncmpi_put_vara_text_all(ncid, varnum, start, count, charvalp); check_err(stat, "ncmpi_put_vara_text_all", __func__, __LINE__, __FILE__); charvalp += subarray_nelems; } break; case NC_SHORT: for (i=0; i<nchunks; i++) { start[0] = i; stat = ncmpi_put_vara_short_all(ncid, varnum, start, count, shortvalp); check_err(stat, "ncmpi_put_vara_short_all", __func__, __LINE__, __FILE__); shortvalp += subarray_nelems; } break; case NC_INT: for (i=0; i<nchunks; i++) { start[0] = i; stat = ncmpi_put_vara_int_all(ncid, varnum, start, count, intvalp); check_err(stat, "ncmpi_put_vara_int_all", __func__, __LINE__, __FILE__); intvalp += subarray_nelems; } break; case NC_FLOAT: for (i=0; i<nchunks; i++) { start[0] = i; stat = ncmpi_put_vara_float_all(ncid, varnum, start, count, floatvalp); check_err(stat, "ncmpi_put_vara_float_all", __func__, __LINE__, __FILE__); floatvalp += subarray_nelems; } break; case NC_DOUBLE: for (i=0; i<nchunks; i++) { start[0] = i; stat = ncmpi_put_vara_double_all(ncid, varnum, start, count, doublevalp); check_err(stat, "ncmpi_put_vara_double_all", __func__, __LINE__, __FILE__); doublevalp += subarray_nelems; } break; case NC_UBYTE: for (i=0; i<nchunks; i++) { start[0] = i; stat = ncmpi_put_vara_uchar_all(ncid, varnum, start, count, ubytevalp); check_err(stat, "ncmpi_put_vara_uchar_all", __func__, __LINE__, __FILE__); ubytevalp += subarray_nelems; } break; case NC_USHORT: for (i=0; i<nchunks; i++) { start[0] = i; stat = ncmpi_put_vara_ushort_all(ncid, varnum, start, count, ushortvalp); check_err(stat, "ncmpi_put_vara_ushort_all", __func__, __LINE__, __FILE__); ushortvalp += subarray_nelems; } break; case NC_UINT: for (i=0; i<nchunks; i++) { start[0] = i; stat = ncmpi_put_vara_uint_all(ncid, varnum, start, count, uintvalp); check_err(stat, "ncmpi_put_vara_uint_all", __func__, __LINE__, __FILE__); uintvalp += subarray_nelems; } break; case NC_INT64: for (i=0; i<nchunks; i++) { start[0] = i; stat = ncmpi_put_vara_longlong_all(ncid, varnum, start, count, int64valp); check_err(stat, "ncmpi_put_vara_longlong_all", __func__, __LINE__, __FILE__); int64valp += subarray_nelems; } break; case NC_UINT64: for (i=0; i<nchunks; i++) { start[0] = i; stat = ncmpi_put_vara_ulonglong_all(ncid, varnum, start, count, uint64valp); check_err(stat, "ncmpi_put_vara_ulonglong_all", __func__, __LINE__, __FILE__); uint64valp += subarray_nelems; } break; default: derror("Unhandled type %d\n", vars[varnum].type); break; } } else { switch (vars[varnum].type) { case NC_BYTE: stat = ncmpi_put_vara_schar_all(ncid, varnum, start, count, (signed char *)charvalp); check_err(stat, "ncmpi_put_vara_schar_all", __func__, __LINE__, __FILE__); break; case NC_CHAR: stat = ncmpi_put_vara_text_all(ncid, varnum, start, count, charvalp); check_err(stat, "ncmpi_put_vara_text_all", __func__, __LINE__, __FILE__); break; case NC_SHORT: stat = ncmpi_put_vara_short_all(ncid, varnum, start, count, shortvalp); check_err(stat, "ncmpi_put_vara_short_all", __func__, __LINE__, __FILE__); break; case NC_INT: stat = ncmpi_put_vara_int_all(ncid, varnum, start, count, intvalp); check_err(stat, "ncmpi_put_vara_int_all", __func__, __LINE__, __FILE__); break; case NC_FLOAT: stat = ncmpi_put_vara_float_all(ncid, varnum, start, count, floatvalp); check_err(stat, "ncmpi_put_vara_float_all", __func__, __LINE__, __FILE__); break; case NC_DOUBLE: stat = ncmpi_put_vara_double_all(ncid, varnum, start, count, doublevalp); check_err(stat, "ncmpi_put_vara_double_all", __func__, __LINE__, __FILE__); break; case NC_UBYTE: stat = ncmpi_put_vara_uchar_all(ncid, varnum, start, count, ubytevalp); check_err(stat, "ncmpi_put_vara_uchar_all", __func__, __LINE__, __FILE__); break; case NC_USHORT: stat = ncmpi_put_vara_ushort_all(ncid, varnum, start, count, ushortvalp); check_err(stat, "ncmpi_put_vara_ushort_all", __func__, __LINE__, __FILE__); break; case NC_UINT: stat = ncmpi_put_vara_uint_all(ncid, varnum, start, count, uintvalp); check_err(stat, "ncmpi_put_vara_uint_all", __func__, __LINE__, __FILE__); break; case NC_INT64: stat = ncmpi_put_vara_longlong_all(ncid, varnum, start, count, int64valp); check_err(stat, "ncmpi_put_vara_longlong_all", __func__, __LINE__, __FILE__); break; case NC_UINT64: stat = ncmpi_put_vara_ulonglong_all(ncid, varnum, start, count, uint64valp); check_err(stat, "ncmpi_put_vara_ulonglong_all", __func__, __LINE__, __FILE__); break; default: derror("Unhandled type %d\n", vars[varnum].type); break; } } free(start); }
/* writes output in pnetcdf format nblocks: local number of blocks vblocks: pointer to array of vblocks out_file: output file name comm: MPI communicator */ void pnetcdf_write(int nblocks, struct vblock_t *vblocks, char *out_file, MPI_Comm comm) { #ifdef USEPNETCDF int err; int ncid, cmode, varids[23], dimids[8], dimids_2D[2]; MPI_Offset start[2], count[2]; MPI_Offset quants[NUM_QUANTS]; /* quantities per block */ MPI_Offset proc_quants[NUM_QUANTS]; /* quantities per process */ MPI_Offset tot_quants[NUM_QUANTS]; /* total quantities all global blocks */ MPI_Offset block_ofsts[NUM_QUANTS]; /* starting offsets for each block */ /* init */ int i; for (i = 0; i < NUM_QUANTS; i++) { quants[i] = 0; proc_quants[i] = 0; tot_quants[i] = 0; block_ofsts[i] = 0; } /* sum quantities over local blocks */ int b; for (b = 0; b < nblocks; b++) { proc_quants[NUM_VERTS] += vblocks[b].num_verts; proc_quants[NUM_COMP_CELLS] += vblocks[b].num_complete_cells; proc_quants[NUM_CELL_FACES] += vblocks[b].tot_num_cell_faces; proc_quants[NUM_FACE_VERTS] += vblocks[b].tot_num_face_verts; proc_quants[NUM_ORIG_PARTS] += vblocks[b].num_orig_particles; proc_quants[NUM_NEIGHBORS] += DIY_Num_neighbors(0, b); } proc_quants[NUM_BLOCKS] = nblocks; /* sum per process values to be global ones */ MPI_Allreduce(proc_quants, tot_quants, NUM_QUANTS, MPI_OFFSET, MPI_SUM, comm); /* prefix sum proc offsets */ MPI_Exscan(proc_quants, &block_ofsts, NUM_QUANTS, MPI_OFFSET, MPI_SUM, comm); /* create a new file for writing */ cmode = NC_CLOBBER | NC_64BIT_DATA; err = ncmpi_create(comm, out_file, cmode, MPI_INFO_NULL, &ncid); ERR; /* define dimensions */ err = ncmpi_def_dim(ncid, "num_g_blocks", tot_quants[NUM_BLOCKS], &dimids[0]); ERR; err = ncmpi_def_dim(ncid, "XYZ", 3, &dimids[1]); ERR; err = ncmpi_def_dim(ncid, "num_g_verts", tot_quants[NUM_VERTS], &dimids[2]); ERR; err = ncmpi_def_dim(ncid, "num_g_complete_cells", tot_quants[NUM_COMP_CELLS], &dimids[3]); ERR; err = ncmpi_def_dim(ncid, "tot_num_g_cell_faces", tot_quants[NUM_CELL_FACES], &dimids[4]); ERR; err = ncmpi_def_dim(ncid, "tot_num_g_face_verts", tot_quants[NUM_FACE_VERTS], &dimids[5]); ERR; err = ncmpi_def_dim(ncid, "num_g_orig_particles", tot_quants[NUM_ORIG_PARTS], &dimids[6]); ERR; err = ncmpi_def_dim(ncid, "num_g_neighbors", tot_quants[NUM_NEIGHBORS], &dimids[7]); ERR; /* define variables */ err = ncmpi_def_var(ncid, "num_verts", NC_INT, 1, &dimids[0], &varids[0]); ERR; err = ncmpi_def_var(ncid, "num_complete_cells", NC_INT, 1, &dimids[0], &varids[1]); ERR; err = ncmpi_def_var(ncid, "tot_num_cell_faces", NC_INT, 1, &dimids[0], &varids[2]); ERR; err = ncmpi_def_var(ncid, "tot_num_face_verts", NC_INT, 1, &dimids[0], &varids[3]); ERR; err = ncmpi_def_var(ncid, "num_orig_particles", NC_INT, 1, &dimids[0], &varids[4]); ERR; /* block offsets */ err = ncmpi_def_var(ncid, "block_off_num_verts", NC_INT64, 1, &dimids[0], &varids[5]); ERR; err = ncmpi_def_var(ncid, "block_off_num_complete_cells", NC_INT64, 1, &dimids[0], &varids[6]); ERR; err = ncmpi_def_var(ncid, "block_off_tot_num_cell_faces", NC_INT64, 1, &dimids[0], &varids[7]); ERR; err = ncmpi_def_var(ncid, "block_off_tot_num_face_verts", NC_INT64, 1, &dimids[0], &varids[8]); ERR; err = ncmpi_def_var(ncid, "block_off_num_orig_particles", NC_INT64, 1, &dimids[0], &varids[9]); ERR; dimids_2D[0] = dimids[0]; dimids_2D[1] = dimids[1]; err = ncmpi_def_var(ncid, "mins", NC_FLOAT, 2, dimids_2D, &varids[11]); ERR; err = ncmpi_def_var(ncid, "maxs", NC_FLOAT, 2, dimids_2D, &varids[12]); ERR; dimids_2D[0] = dimids[2]; dimids_2D[1] = dimids[1]; err = ncmpi_def_var(ncid, "save_verts", NC_FLOAT, 2, dimids_2D, &varids[13]); ERR; dimids_2D[0] = dimids[6]; dimids_2D[1] = dimids[1]; err = ncmpi_def_var(ncid, "sites", NC_FLOAT, 2, dimids_2D, &varids[14]); ERR; err = ncmpi_def_var(ncid, "complete_cells", NC_INT, 1, &dimids[3], &varids[15]); ERR; err = ncmpi_def_var(ncid, "areas", NC_FLOAT, 1, &dimids[3], &varids[16]); ERR; err = ncmpi_def_var(ncid, "vols", NC_FLOAT, 1, &dimids[3], &varids[17]); ERR; err = ncmpi_def_var(ncid, "num_cell_faces", NC_INT, 1, &dimids[3], &varids[18]); ERR; err = ncmpi_def_var(ncid, "num_face_verts", NC_INT, 1, &dimids[4], &varids[19]); ERR; err = ncmpi_def_var(ncid, "face_verts", NC_INT, 1, &dimids[5], &varids[20]); ERR; err = ncmpi_def_var(ncid, "neighbors", NC_INT, 1, &dimids[7], &varids[21]); ERR; err = ncmpi_def_var(ncid, "g_block_ids", NC_INT, 1, &dimids[0], &varids[22]); ERR; /* exit define mode */ err = ncmpi_enddef(ncid); ERR; /* write all variables. to improve: we can try nonblocking I/O to aggregate small requests */ for (b = 0; b < nblocks; b++) { struct vblock_t *v = &vblocks[b]; /* quantities */ start[0] = block_ofsts[NUM_BLOCKS]; count[0] = 1; err = ncmpi_put_vara_int_all(ncid, varids[0], start, count, &v->num_verts); ERR; err = ncmpi_put_vara_int_all(ncid, varids[1], start, count, &v->num_complete_cells); ERR; err = ncmpi_put_vara_int_all(ncid, varids[2], start, count, &v->tot_num_cell_faces); ERR; err = ncmpi_put_vara_int_all(ncid, varids[3], start, count, &v->tot_num_face_verts); ERR; err = ncmpi_put_vara_int_all(ncid, varids[4], start, count, &v->num_orig_particles); ERR; /* block offsets */ err = ncmpi_put_vara_longlong_all(ncid, varids[5], start, count, &block_ofsts[NUM_VERTS]); ERR; err = ncmpi_put_vara_longlong_all(ncid, varids[6], start, count, &block_ofsts[NUM_COMP_CELLS]); ERR; err = ncmpi_put_vara_longlong_all(ncid, varids[7], start, count, &block_ofsts[NUM_CELL_FACES]); ERR; err = ncmpi_put_vara_longlong_all(ncid, varids[8], start, count, &block_ofsts[NUM_FACE_VERTS]); ERR; err = ncmpi_put_vara_longlong_all(ncid, varids[9], start, count, &block_ofsts[NUM_ORIG_PARTS]); ERR; /* block bounds */ start[0] = block_ofsts[NUM_BLOCKS]; count[0] = 1; start[1] = 0; count[1] = 3; err = ncmpi_put_vara_float_all(ncid, varids[11], start, count, v->mins); ERR; err = ncmpi_put_vara_float_all(ncid, varids[12], start, count, v->maxs); ERR; /* save_verts */ start[0] = block_ofsts[NUM_VERTS]; start[1] = 0; count[0] = v->num_verts; count[1] = 3; err = ncmpi_put_vara_float_all(ncid, varids[13], start, count, v->save_verts); ERR; /* sites */ start[0] = block_ofsts[NUM_ORIG_PARTS]; start[1] = 0; count[0] = v->num_orig_particles; count[1] = 3; err = ncmpi_put_vara_float_all(ncid, varids[14], start, count, v->sites); ERR; /* complete cells */ start[0] = block_ofsts[NUM_COMP_CELLS]; count[0] = v->num_complete_cells; err = ncmpi_put_vara_int_all(ncid, varids[15], start, count, v->complete_cells); ERR; /* areas */ start[0] = block_ofsts[NUM_COMP_CELLS]; count[0] = v->num_complete_cells; err = ncmpi_put_vara_float_all(ncid, varids[16], start, count, v->areas); ERR; /* volumes */ start[0] = block_ofsts[NUM_COMP_CELLS]; count[0] = v->num_complete_cells; err = ncmpi_put_vara_float_all(ncid, varids[17], start, count, v->vols); ERR; /* num_cell_faces */ start[0] = block_ofsts[NUM_COMP_CELLS]; count[0] = v->num_complete_cells; err = ncmpi_put_vara_int_all(ncid, varids[18], start, count, v->num_cell_faces); ERR; /* num_face_verts */ start[0] = block_ofsts[NUM_CELL_FACES]; count[0] = v->tot_num_cell_faces; err = ncmpi_put_vara_int_all(ncid, varids[19], start, count, v->num_face_verts); ERR; /* face verts */ start[0] = block_ofsts[NUM_FACE_VERTS]; count[0] = v->tot_num_face_verts; err = ncmpi_put_vara_int_all(ncid, varids[20], start, count, v->face_verts); ERR; /* neighbors */ int *neighbors = (int*)malloc(DIY_Num_neighbors(0, b) * sizeof(int)); int num_neighbors = DIY_Get_neighbors(0, b, neighbors); start[0] = block_ofsts[NUM_NEIGHBORS]; count[0] = num_neighbors; err = ncmpi_put_vara_int_all(ncid, varids[21], start, count, neighbors); ERR; /* gids */ int gid = DIY_Gid(0, b); start[0] = block_ofsts[NUM_BLOCKS]; count[0] = 1; err = ncmpi_put_vara_int_all(ncid, varids[22], start, count, &gid); ERR; /* update block offsets */ block_ofsts[NUM_VERTS] += v->num_verts; block_ofsts[NUM_COMP_CELLS] += v->num_complete_cells; block_ofsts[NUM_CELL_FACES] += v->tot_num_cell_faces; block_ofsts[NUM_FACE_VERTS] += v->tot_num_face_verts; block_ofsts[NUM_ORIG_PARTS] += v->num_orig_particles; block_ofsts[NUM_NEIGHBORS] += num_neighbors; block_ofsts[NUM_BLOCKS]++; /* debug */ /* fprintf(stderr, "gid = %d num_verts = %d num_complete_cells = %d " */ /* "tot_num_cell_faces = %d tot_num_face_verts = %d " */ /* "num_orig_particles = %d\n", */ /* gid, v->num_verts, v->num_complete_cells, v->tot_num_cell_faces, */ /* v->tot_num_face_verts, v->num_orig_particles); */ } err = ncmpi_close(ncid); ERR; #endif }