//-------------------------------------------------------------------------- // // sends an item to a block (asynchronous) // // did: domain id // lid: local block id // item: item(s) to be sent // count: number of items // datatype: item datatype // dest_gid: destination global block id // // returns: error code // int DIY_Send(int did, int lid, void *item, int count, DIY_Datatype datatype, int dest_gid) { #ifdef _MPI3 int my_gid = DIY_Gid(did, lid); cc->RmaSend(item, count, datatype, my_gid, dest_gid, assign); #else did = did; // quiet compiler warning lid = lid; // ditto cc->Send(item, count, datatype, dest_gid, assign); #endif return 0; }
//-------------------------------------------------------------------------- // // receives an item from a block (asynchronous) // // did: domain id // lid: local block id // items: items to be received (output, array af pointers allocated by caller) // count: number of items received (output) // wait: whether to wait for one or more items to arrive (0 or 1) // datatype: item datatype // src_gids: source global block ids (output, array allocated by caller) // only valid if MPI-3 is used, otherwise filled with -1 values // sizes: size of each item received in datatypes (not bytes) // (output, array allocated by caller) // // returns: error code // int DIY_Recv(int did, int lid, void **items, int *count, int wait, DIY_Datatype datatype, int *src_gids, int *sizes) { int my_gid = DIY_Gid(did, lid); #ifdef _MPI3 *count = cc->RmaRecv(my_gid, items, datatype, src_gids, wait, assign[did], sizes); #else *count = cc->Recv(my_gid, items, datatype, wait, sizes); for (int i = 0; i < *count; i++) src_gids[i] = -1; // only valid for RMA version #endif return 0; }
/* writes output in pnetcdf format nblocks: local number of blocks vblocks: pointer to array of vblocks out_file: output file name comm: MPI communicator */ void pnetcdf_write(int nblocks, struct vblock_t *vblocks, char *out_file, MPI_Comm comm) { #ifdef USEPNETCDF int err; int ncid, cmode, varids[23], dimids[8], dimids_2D[2]; MPI_Offset start[2], count[2]; MPI_Offset quants[NUM_QUANTS]; /* quantities per block */ MPI_Offset proc_quants[NUM_QUANTS]; /* quantities per process */ MPI_Offset tot_quants[NUM_QUANTS]; /* total quantities all global blocks */ MPI_Offset block_ofsts[NUM_QUANTS]; /* starting offsets for each block */ /* init */ int i; for (i = 0; i < NUM_QUANTS; i++) { quants[i] = 0; proc_quants[i] = 0; tot_quants[i] = 0; block_ofsts[i] = 0; } /* sum quantities over local blocks */ int b; for (b = 0; b < nblocks; b++) { proc_quants[NUM_VERTS] += vblocks[b].num_verts; proc_quants[NUM_COMP_CELLS] += vblocks[b].num_complete_cells; proc_quants[NUM_CELL_FACES] += vblocks[b].tot_num_cell_faces; proc_quants[NUM_FACE_VERTS] += vblocks[b].tot_num_face_verts; proc_quants[NUM_ORIG_PARTS] += vblocks[b].num_orig_particles; proc_quants[NUM_NEIGHBORS] += DIY_Num_neighbors(0, b); } proc_quants[NUM_BLOCKS] = nblocks; /* sum per process values to be global ones */ MPI_Allreduce(proc_quants, tot_quants, NUM_QUANTS, MPI_OFFSET, MPI_SUM, comm); /* prefix sum proc offsets */ MPI_Exscan(proc_quants, &block_ofsts, NUM_QUANTS, MPI_OFFSET, MPI_SUM, comm); /* create a new file for writing */ cmode = NC_CLOBBER | NC_64BIT_DATA; err = ncmpi_create(comm, out_file, cmode, MPI_INFO_NULL, &ncid); ERR; /* define dimensions */ err = ncmpi_def_dim(ncid, "num_g_blocks", tot_quants[NUM_BLOCKS], &dimids[0]); ERR; err = ncmpi_def_dim(ncid, "XYZ", 3, &dimids[1]); ERR; err = ncmpi_def_dim(ncid, "num_g_verts", tot_quants[NUM_VERTS], &dimids[2]); ERR; err = ncmpi_def_dim(ncid, "num_g_complete_cells", tot_quants[NUM_COMP_CELLS], &dimids[3]); ERR; err = ncmpi_def_dim(ncid, "tot_num_g_cell_faces", tot_quants[NUM_CELL_FACES], &dimids[4]); ERR; err = ncmpi_def_dim(ncid, "tot_num_g_face_verts", tot_quants[NUM_FACE_VERTS], &dimids[5]); ERR; err = ncmpi_def_dim(ncid, "num_g_orig_particles", tot_quants[NUM_ORIG_PARTS], &dimids[6]); ERR; err = ncmpi_def_dim(ncid, "num_g_neighbors", tot_quants[NUM_NEIGHBORS], &dimids[7]); ERR; /* define variables */ err = ncmpi_def_var(ncid, "num_verts", NC_INT, 1, &dimids[0], &varids[0]); ERR; err = ncmpi_def_var(ncid, "num_complete_cells", NC_INT, 1, &dimids[0], &varids[1]); ERR; err = ncmpi_def_var(ncid, "tot_num_cell_faces", NC_INT, 1, &dimids[0], &varids[2]); ERR; err = ncmpi_def_var(ncid, "tot_num_face_verts", NC_INT, 1, &dimids[0], &varids[3]); ERR; err = ncmpi_def_var(ncid, "num_orig_particles", NC_INT, 1, &dimids[0], &varids[4]); ERR; /* block offsets */ err = ncmpi_def_var(ncid, "block_off_num_verts", NC_INT64, 1, &dimids[0], &varids[5]); ERR; err = ncmpi_def_var(ncid, "block_off_num_complete_cells", NC_INT64, 1, &dimids[0], &varids[6]); ERR; err = ncmpi_def_var(ncid, "block_off_tot_num_cell_faces", NC_INT64, 1, &dimids[0], &varids[7]); ERR; err = ncmpi_def_var(ncid, "block_off_tot_num_face_verts", NC_INT64, 1, &dimids[0], &varids[8]); ERR; err = ncmpi_def_var(ncid, "block_off_num_orig_particles", NC_INT64, 1, &dimids[0], &varids[9]); ERR; dimids_2D[0] = dimids[0]; dimids_2D[1] = dimids[1]; err = ncmpi_def_var(ncid, "mins", NC_FLOAT, 2, dimids_2D, &varids[11]); ERR; err = ncmpi_def_var(ncid, "maxs", NC_FLOAT, 2, dimids_2D, &varids[12]); ERR; dimids_2D[0] = dimids[2]; dimids_2D[1] = dimids[1]; err = ncmpi_def_var(ncid, "save_verts", NC_FLOAT, 2, dimids_2D, &varids[13]); ERR; dimids_2D[0] = dimids[6]; dimids_2D[1] = dimids[1]; err = ncmpi_def_var(ncid, "sites", NC_FLOAT, 2, dimids_2D, &varids[14]); ERR; err = ncmpi_def_var(ncid, "complete_cells", NC_INT, 1, &dimids[3], &varids[15]); ERR; err = ncmpi_def_var(ncid, "areas", NC_FLOAT, 1, &dimids[3], &varids[16]); ERR; err = ncmpi_def_var(ncid, "vols", NC_FLOAT, 1, &dimids[3], &varids[17]); ERR; err = ncmpi_def_var(ncid, "num_cell_faces", NC_INT, 1, &dimids[3], &varids[18]); ERR; err = ncmpi_def_var(ncid, "num_face_verts", NC_INT, 1, &dimids[4], &varids[19]); ERR; err = ncmpi_def_var(ncid, "face_verts", NC_INT, 1, &dimids[5], &varids[20]); ERR; err = ncmpi_def_var(ncid, "neighbors", NC_INT, 1, &dimids[7], &varids[21]); ERR; err = ncmpi_def_var(ncid, "g_block_ids", NC_INT, 1, &dimids[0], &varids[22]); ERR; /* exit define mode */ err = ncmpi_enddef(ncid); ERR; /* write all variables. to improve: we can try nonblocking I/O to aggregate small requests */ for (b = 0; b < nblocks; b++) { struct vblock_t *v = &vblocks[b]; /* quantities */ start[0] = block_ofsts[NUM_BLOCKS]; count[0] = 1; err = ncmpi_put_vara_int_all(ncid, varids[0], start, count, &v->num_verts); ERR; err = ncmpi_put_vara_int_all(ncid, varids[1], start, count, &v->num_complete_cells); ERR; err = ncmpi_put_vara_int_all(ncid, varids[2], start, count, &v->tot_num_cell_faces); ERR; err = ncmpi_put_vara_int_all(ncid, varids[3], start, count, &v->tot_num_face_verts); ERR; err = ncmpi_put_vara_int_all(ncid, varids[4], start, count, &v->num_orig_particles); ERR; /* block offsets */ err = ncmpi_put_vara_longlong_all(ncid, varids[5], start, count, &block_ofsts[NUM_VERTS]); ERR; err = ncmpi_put_vara_longlong_all(ncid, varids[6], start, count, &block_ofsts[NUM_COMP_CELLS]); ERR; err = ncmpi_put_vara_longlong_all(ncid, varids[7], start, count, &block_ofsts[NUM_CELL_FACES]); ERR; err = ncmpi_put_vara_longlong_all(ncid, varids[8], start, count, &block_ofsts[NUM_FACE_VERTS]); ERR; err = ncmpi_put_vara_longlong_all(ncid, varids[9], start, count, &block_ofsts[NUM_ORIG_PARTS]); ERR; /* block bounds */ start[0] = block_ofsts[NUM_BLOCKS]; count[0] = 1; start[1] = 0; count[1] = 3; err = ncmpi_put_vara_float_all(ncid, varids[11], start, count, v->mins); ERR; err = ncmpi_put_vara_float_all(ncid, varids[12], start, count, v->maxs); ERR; /* save_verts */ start[0] = block_ofsts[NUM_VERTS]; start[1] = 0; count[0] = v->num_verts; count[1] = 3; err = ncmpi_put_vara_float_all(ncid, varids[13], start, count, v->save_verts); ERR; /* sites */ start[0] = block_ofsts[NUM_ORIG_PARTS]; start[1] = 0; count[0] = v->num_orig_particles; count[1] = 3; err = ncmpi_put_vara_float_all(ncid, varids[14], start, count, v->sites); ERR; /* complete cells */ start[0] = block_ofsts[NUM_COMP_CELLS]; count[0] = v->num_complete_cells; err = ncmpi_put_vara_int_all(ncid, varids[15], start, count, v->complete_cells); ERR; /* areas */ start[0] = block_ofsts[NUM_COMP_CELLS]; count[0] = v->num_complete_cells; err = ncmpi_put_vara_float_all(ncid, varids[16], start, count, v->areas); ERR; /* volumes */ start[0] = block_ofsts[NUM_COMP_CELLS]; count[0] = v->num_complete_cells; err = ncmpi_put_vara_float_all(ncid, varids[17], start, count, v->vols); ERR; /* num_cell_faces */ start[0] = block_ofsts[NUM_COMP_CELLS]; count[0] = v->num_complete_cells; err = ncmpi_put_vara_int_all(ncid, varids[18], start, count, v->num_cell_faces); ERR; /* num_face_verts */ start[0] = block_ofsts[NUM_CELL_FACES]; count[0] = v->tot_num_cell_faces; err = ncmpi_put_vara_int_all(ncid, varids[19], start, count, v->num_face_verts); ERR; /* face verts */ start[0] = block_ofsts[NUM_FACE_VERTS]; count[0] = v->tot_num_face_verts; err = ncmpi_put_vara_int_all(ncid, varids[20], start, count, v->face_verts); ERR; /* neighbors */ int *neighbors = (int*)malloc(DIY_Num_neighbors(0, b) * sizeof(int)); int num_neighbors = DIY_Get_neighbors(0, b, neighbors); start[0] = block_ofsts[NUM_NEIGHBORS]; count[0] = num_neighbors; err = ncmpi_put_vara_int_all(ncid, varids[21], start, count, neighbors); ERR; /* gids */ int gid = DIY_Gid(0, b); start[0] = block_ofsts[NUM_BLOCKS]; count[0] = 1; err = ncmpi_put_vara_int_all(ncid, varids[22], start, count, &gid); ERR; /* update block offsets */ block_ofsts[NUM_VERTS] += v->num_verts; block_ofsts[NUM_COMP_CELLS] += v->num_complete_cells; block_ofsts[NUM_CELL_FACES] += v->tot_num_cell_faces; block_ofsts[NUM_FACE_VERTS] += v->tot_num_face_verts; block_ofsts[NUM_ORIG_PARTS] += v->num_orig_particles; block_ofsts[NUM_NEIGHBORS] += num_neighbors; block_ofsts[NUM_BLOCKS]++; /* debug */ /* fprintf(stderr, "gid = %d num_verts = %d num_complete_cells = %d " */ /* "tot_num_cell_faces = %d tot_num_face_verts = %d " */ /* "num_orig_particles = %d\n", */ /* gid, v->num_verts, v->num_complete_cells, v->tot_num_cell_faces, */ /* v->tot_num_face_verts, v->num_orig_particles); */ } err = ncmpi_close(ncid); ERR; #endif }
//---------------------------------------------------------------------------- // // radix-k merge // // did: decomposition id // its: pointers to input/ouput items, results in first number of output items // hdrs: pointers to input headers (optional, pass NULL if unnecessary) // nr: number of rounds // kv: k vector, radix for each round // cc: pointer to communicate object // assign: pointer to assignment object // merge_func: pointer to merging function // create_func: pointer to function that creates item // destroy_func: pointer to function that destroys item // type_func: pointer to function that creates MPI datatype for item // // side effects: allocates output items and array of pointers to them, if // not reducing in-place // // returns: number of output items // int Merge::MergeBlocks(int did, char **its, int **hdrs, int nr, int *kv, Comm *cc, Assignment *assign, void (*merge_func)(char **, int *, int, int *), char * (*create_func)(int *), void (*destroy_func)(void *), void (*type_func)(void*, MPI_Datatype*, int *)) { int rank, groupsize; // MPI usual int gid; // global id of current item block int p; // process rank MPI_Datatype dtype; // data type int ng; // number of groups this process owns int nb = assign->NumBlks(); // number of blocks this process owns vector<char *> my_its(its, its + nb); // copy of its vector<bool> done(nb, false); // done items vector<int> root_gids; // distinct gids of root blocks // init assert(nr > 0 && nr <= DIY_MAX_R); // sanity MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &groupsize); // for all rounds for (int r = 0; r < nr; r++){ int n_recv = 0; // number of received blocks by root int partners[kv[r]]; // gids of partners in a group root_gids.clear(); root_gids.reserve(kv[r]); // all my current blocks must participate in a round (send or receive) for (int b = 0; b < nb; b++) { if (!done[b]) { // blocks that survived to this round gid = DIY_Gid(did, b); bool root = GetPartners(kv, r, gid, partners); if (!root) { // nonroots post sends of headers and items p = assign->Gid2Proc(partners[kv[r] - 1]); if (hdrs) type_func(my_its[b], &dtype, hdrs[b]); else type_func(my_its[b], &dtype, NULL); // tag is source block gid if (hdrs && dtype_absolute_address) cc->SendItem((char *)MPI_BOTTOM, hdrs[b], p, gid, &dtype); else if (hdrs && !dtype_absolute_address) cc->SendItem((char *)my_its[b], hdrs[b], p, gid, &dtype); else if (!hdrs && dtype_absolute_address) cc->SendItem((char *)MPI_BOTTOM, NULL, p, gid, &dtype); else cc->SendItem((char *)my_its[b], NULL, p, gid, &dtype); MPI_Type_free(&dtype); done[b] = true; // nonroot blocks are done after they have been sent } else { // root posts receives of headers root_gids.push_back(partners[kv[r] - 1]); for (int k = 0; k < kv[r] - 1; k++) { // receive the others p = assign->Gid2Proc(partners[k]); cc->StartRecvItem(p, hdrs); n_recv++; } } } // blocks that survived to this round } // all my current blocks // finish receiving all items char *recv_its[n_recv]; // received items int recv_gids[n_recv]; // (source) gids of the received items int recv_procs[n_recv]; // source proc of each received item cc->FinishRecvItemsMerge(recv_its, recv_gids, recv_procs, create_func, type_func); // merge each group ng = (int)root_gids.size(); // number of groups this process owns for (int j = 0; j < ng; j++) { vector<char *>reduce_its; // items ready for reduction in a group vector<int>reduce_gids; // gids for reduce_its reduce_its.reserve(kv[r]); reduce_gids.reserve(kv[r]); int lid = assign->Gid2Lid(root_gids[j]); reduce_its.push_back(my_its[lid]); reduce_gids.push_back(root_gids[j]); GetPartners(kv, r, root_gids[j], partners); for (int i = 0; i < n_recv; i++) { // collect items for this group if (find(partners, partners + kv[r], recv_gids[i]) != partners + kv[r]) { reduce_its.push_back(recv_its[i]); reduce_gids.push_back(recv_gids[i]); } } // header from root block of merge is used if (hdrs) merge_func(&reduce_its[0], &reduce_gids[0], kv[r], hdrs[lid]); else merge_func(&reduce_its[0], &reduce_gids[0], kv[r], NULL); my_its[lid] = reduce_its[0]; } // cleanup if (ng) { for (int i = 0; i < n_recv; i++) destroy_func(recv_its[i]); } } // for all rounds // move results to the front, swapping them rather than copying so that user // can free all items without having duplicated pointers that get freed // multiple times for (int i = 0; i < ng; i++) { char *temp = its[i]; its[i] = my_its[assign->Gid2Lid(root_gids[i])]; its[assign->Gid2Lid(root_gids[i])] = temp; } return ng; }