//--------------------------------------------------------------------------
//
// sends an item to a block (asynchronous)
//
// did: domain id
// lid: local block id
// item: item(s) to be sent
// count: number of items
// datatype: item datatype
// dest_gid: destination global block id
//
// returns: error code
//
int DIY_Send(int did, int lid, void *item, int count, DIY_Datatype datatype, 
	     int dest_gid) {

#ifdef _MPI3
  int my_gid = DIY_Gid(did, lid);
  cc->RmaSend(item, count, datatype, my_gid, dest_gid, assign);
#else
  did = did; // quiet compiler warning
  lid = lid; // ditto
  cc->Send(item, count, datatype, dest_gid, assign);
#endif    

  return 0;

}
//--------------------------------------------------------------------------
//
// receives an item from a block (asynchronous)
//
// did: domain id
// lid: local block id
// items: items to be received (output, array af pointers allocated by caller)
// count: number of items received (output)
// wait: whether to wait for one or more items to arrive (0 or 1)
// datatype: item datatype
// src_gids: source global block ids (output, array allocated by caller)
//  only valid if MPI-3 is used, otherwise filled with -1 values
// sizes: size of each item received in datatypes (not bytes)
//  (output, array allocated by caller)
//
// returns: error code
//
int DIY_Recv(int did, int lid, void **items, int *count, int wait,
	     DIY_Datatype datatype, int *src_gids, int *sizes) {

  int my_gid = DIY_Gid(did, lid);

#ifdef _MPI3
  *count = cc->RmaRecv(my_gid, items, datatype, src_gids, wait, assign[did], 
		       sizes);
#else
  *count = cc->Recv(my_gid, items, datatype, wait, sizes);
  for (int i = 0; i < *count; i++)
    src_gids[i] = -1; // only valid for RMA version
#endif

  return 0;

}
Esempio n. 3
0
/*
  writes output in pnetcdf format

  nblocks: local number of blocks
  vblocks: pointer to array of vblocks
  out_file: output file name
  comm: MPI communicator
*/
void pnetcdf_write(int nblocks, struct vblock_t *vblocks,
       char *out_file, MPI_Comm comm) {

#ifdef USEPNETCDF
  int err;
  int ncid, cmode, varids[23], dimids[8], dimids_2D[2];
  MPI_Offset start[2], count[2];

  MPI_Offset quants[NUM_QUANTS]; /* quantities per block */
  MPI_Offset proc_quants[NUM_QUANTS]; /* quantities per process */
  MPI_Offset tot_quants[NUM_QUANTS]; /* total quantities all global blocks */
  MPI_Offset block_ofsts[NUM_QUANTS]; /* starting offsets for each block */

  /* init */
  int i;
  for (i = 0; i < NUM_QUANTS; i++) {
    quants[i] = 0;
    proc_quants[i] = 0;
    tot_quants[i] = 0;
    block_ofsts[i] = 0;
  }

  /* sum quantities over local blocks */
  int b;
  for (b = 0; b < nblocks; b++) {
    proc_quants[NUM_VERTS] += vblocks[b].num_verts;
    proc_quants[NUM_COMP_CELLS] += vblocks[b].num_complete_cells;
    proc_quants[NUM_CELL_FACES] += vblocks[b].tot_num_cell_faces;
    proc_quants[NUM_FACE_VERTS] += vblocks[b].tot_num_face_verts;
    proc_quants[NUM_ORIG_PARTS] += vblocks[b].num_orig_particles;
    proc_quants[NUM_NEIGHBORS] += DIY_Num_neighbors(0, b);
  }
  proc_quants[NUM_BLOCKS] = nblocks;

  /* sum per process values to be global ones */
  MPI_Allreduce(proc_quants, tot_quants, NUM_QUANTS, MPI_OFFSET, MPI_SUM, comm);

  /* prefix sum proc offsets */
  MPI_Exscan(proc_quants, &block_ofsts, NUM_QUANTS, MPI_OFFSET, MPI_SUM, comm);

  /* create a new file for writing */
  cmode = NC_CLOBBER | NC_64BIT_DATA;
  err = ncmpi_create(comm, out_file, cmode, MPI_INFO_NULL, &ncid); ERR;

  /* define dimensions */
  err = ncmpi_def_dim(ncid, "num_g_blocks", tot_quants[NUM_BLOCKS],
          &dimids[0]); ERR;
  err = ncmpi_def_dim(ncid, "XYZ", 3, &dimids[1]); ERR;
  err = ncmpi_def_dim(ncid, "num_g_verts", tot_quants[NUM_VERTS],
          &dimids[2]); ERR;
  err = ncmpi_def_dim(ncid, "num_g_complete_cells", tot_quants[NUM_COMP_CELLS],
          &dimids[3]); ERR;
  err = ncmpi_def_dim(ncid, "tot_num_g_cell_faces", tot_quants[NUM_CELL_FACES],
          &dimids[4]); ERR;
  err = ncmpi_def_dim(ncid, "tot_num_g_face_verts", tot_quants[NUM_FACE_VERTS],
          &dimids[5]); ERR;
  err = ncmpi_def_dim(ncid, "num_g_orig_particles", tot_quants[NUM_ORIG_PARTS],
          &dimids[6]); ERR;
  err = ncmpi_def_dim(ncid, "num_g_neighbors", tot_quants[NUM_NEIGHBORS],
          &dimids[7]); ERR;

  /* define variables */
  err = ncmpi_def_var(ncid, "num_verts", NC_INT, 1, &dimids[0],
          &varids[0]); ERR;
  err = ncmpi_def_var(ncid, "num_complete_cells", NC_INT, 1, &dimids[0],
          &varids[1]); ERR;
  err = ncmpi_def_var(ncid, "tot_num_cell_faces", NC_INT, 1, &dimids[0],
          &varids[2]); ERR;
  err = ncmpi_def_var(ncid, "tot_num_face_verts", NC_INT, 1, &dimids[0],
          &varids[3]); ERR;
  err = ncmpi_def_var(ncid, "num_orig_particles", NC_INT, 1, &dimids[0],
          &varids[4]); ERR;

  /* block offsets */
  err = ncmpi_def_var(ncid, "block_off_num_verts", NC_INT64, 1, &dimids[0],
          &varids[5]); ERR;
  err = ncmpi_def_var(ncid, "block_off_num_complete_cells", NC_INT64, 1,
          &dimids[0], &varids[6]); ERR;
  err = ncmpi_def_var(ncid, "block_off_tot_num_cell_faces", NC_INT64, 1,
          &dimids[0], &varids[7]); ERR;
  err = ncmpi_def_var(ncid, "block_off_tot_num_face_verts", NC_INT64, 1,
          &dimids[0], &varids[8]); ERR;
  err = ncmpi_def_var(ncid, "block_off_num_orig_particles", NC_INT64, 1,
          &dimids[0], &varids[9]); ERR;

  dimids_2D[0] = dimids[0];
  dimids_2D[1] = dimids[1];
  err = ncmpi_def_var(ncid, "mins", NC_FLOAT, 2, dimids_2D, &varids[11]); ERR;
  err = ncmpi_def_var(ncid, "maxs", NC_FLOAT, 2, dimids_2D, &varids[12]); ERR;

  dimids_2D[0] = dimids[2];
  dimids_2D[1] = dimids[1];
  err = ncmpi_def_var(ncid, "save_verts", NC_FLOAT, 2, dimids_2D,
          &varids[13]); ERR;
  dimids_2D[0] = dimids[6];
  dimids_2D[1] = dimids[1];
  err = ncmpi_def_var(ncid, "sites", NC_FLOAT, 2, dimids_2D,
          &varids[14]); ERR;
  err = ncmpi_def_var(ncid, "complete_cells", NC_INT, 1, &dimids[3],
          &varids[15]); ERR;
  err = ncmpi_def_var(ncid, "areas", NC_FLOAT, 1, &dimids[3],
          &varids[16]); ERR;
  err = ncmpi_def_var(ncid, "vols", NC_FLOAT, 1, &dimids[3], &varids[17]); ERR;
  err = ncmpi_def_var(ncid, "num_cell_faces", NC_INT, 1, &dimids[3],
          &varids[18]); ERR;
  err = ncmpi_def_var(ncid, "num_face_verts", NC_INT, 1, &dimids[4],
          &varids[19]); ERR;
  err = ncmpi_def_var(ncid, "face_verts", NC_INT, 1, &dimids[5],
          &varids[20]); ERR;
  err = ncmpi_def_var(ncid, "neighbors", NC_INT, 1, &dimids[7],
          &varids[21]); ERR;
  err = ncmpi_def_var(ncid, "g_block_ids", NC_INT, 1, &dimids[0],
          &varids[22]); ERR;

  /* exit define mode */
  err = ncmpi_enddef(ncid); ERR;

  /* write all variables.
     to improve: we can try nonblocking I/O to aggregate small requests */

  for (b = 0; b < nblocks; b++) {

    struct vblock_t *v = &vblocks[b];

    /* quantities */
    start[0] = block_ofsts[NUM_BLOCKS];
    count[0] = 1;
    err = ncmpi_put_vara_int_all(ncid, varids[0], start, count,
         &v->num_verts); ERR;
    err = ncmpi_put_vara_int_all(ncid, varids[1], start, count,
         &v->num_complete_cells); ERR;
    err = ncmpi_put_vara_int_all(ncid, varids[2], start, count,
         &v->tot_num_cell_faces); ERR;
    err = ncmpi_put_vara_int_all(ncid, varids[3], start, count,
         &v->tot_num_face_verts); ERR;
    err = ncmpi_put_vara_int_all(ncid, varids[4], start, count,
         &v->num_orig_particles); ERR;

    /* block offsets */
    err = ncmpi_put_vara_longlong_all(ncid, varids[5], start, count,
              &block_ofsts[NUM_VERTS]); ERR;
    err = ncmpi_put_vara_longlong_all(ncid, varids[6], start, count,
              &block_ofsts[NUM_COMP_CELLS]); ERR;
    err = ncmpi_put_vara_longlong_all(ncid, varids[7], start, count,
              &block_ofsts[NUM_CELL_FACES]); ERR;
    err = ncmpi_put_vara_longlong_all(ncid, varids[8], start, count,
              &block_ofsts[NUM_FACE_VERTS]); ERR;
    err = ncmpi_put_vara_longlong_all(ncid, varids[9], start, count,
              &block_ofsts[NUM_ORIG_PARTS]); ERR;

    /* block bounds */
    start[0] = block_ofsts[NUM_BLOCKS];
    count[0] = 1;
    start[1] = 0;
    count[1] = 3;
    err = ncmpi_put_vara_float_all(ncid, varids[11], start, count,
           v->mins); ERR;
    err = ncmpi_put_vara_float_all(ncid, varids[12], start, count,
           v->maxs); ERR;

    /* save_verts */
    start[0] = block_ofsts[NUM_VERTS];
    start[1] = 0;
    count[0] = v->num_verts;
    count[1] = 3;
    err = ncmpi_put_vara_float_all(ncid, varids[13], start, count,
           v->save_verts); ERR;

    /* sites */
    start[0] = block_ofsts[NUM_ORIG_PARTS];
    start[1] = 0;
    count[0] = v->num_orig_particles;
    count[1] = 3;
    err = ncmpi_put_vara_float_all(ncid, varids[14], start, count,
           v->sites); ERR;

    /* complete cells */
    start[0] = block_ofsts[NUM_COMP_CELLS];
    count[0] = v->num_complete_cells;
    err = ncmpi_put_vara_int_all(ncid, varids[15], start, count,
         v->complete_cells); ERR;

    /* areas */
    start[0] = block_ofsts[NUM_COMP_CELLS];
    count[0] = v->num_complete_cells;
    err = ncmpi_put_vara_float_all(ncid, varids[16], start, count,
           v->areas); ERR;

    /* volumes */
    start[0] = block_ofsts[NUM_COMP_CELLS];
    count[0] = v->num_complete_cells;
    err = ncmpi_put_vara_float_all(ncid, varids[17], start, count,
           v->vols); ERR;

    /* num_cell_faces */
    start[0] = block_ofsts[NUM_COMP_CELLS];
    count[0] = v->num_complete_cells;
    err = ncmpi_put_vara_int_all(ncid, varids[18], start, count,
         v->num_cell_faces); ERR;

    /* num_face_verts */
    start[0] = block_ofsts[NUM_CELL_FACES];
    count[0] = v->tot_num_cell_faces;
    err = ncmpi_put_vara_int_all(ncid, varids[19], start, count,
         v->num_face_verts); ERR;

    /* face verts */
    start[0] = block_ofsts[NUM_FACE_VERTS];
    count[0] = v->tot_num_face_verts;
    err = ncmpi_put_vara_int_all(ncid, varids[20], start, count,
         v->face_verts); ERR;

    /* neighbors */
    int *neighbors = (int*)malloc(DIY_Num_neighbors(0, b) * sizeof(int));
    int num_neighbors = DIY_Get_neighbors(0, b, neighbors);
    start[0] = block_ofsts[NUM_NEIGHBORS];
    count[0] = num_neighbors;
    err = ncmpi_put_vara_int_all(ncid, varids[21], start, count, neighbors);
    ERR;

    /* gids */
    int gid = DIY_Gid(0, b);
    start[0] = block_ofsts[NUM_BLOCKS];
    count[0] = 1;
    err = ncmpi_put_vara_int_all(ncid, varids[22], start, count,
         &gid); ERR;

    /* update block offsets */
    block_ofsts[NUM_VERTS] += v->num_verts;
    block_ofsts[NUM_COMP_CELLS] += v->num_complete_cells;
    block_ofsts[NUM_CELL_FACES] += v->tot_num_cell_faces;
    block_ofsts[NUM_FACE_VERTS] += v->tot_num_face_verts;
    block_ofsts[NUM_ORIG_PARTS] += v->num_orig_particles;
    block_ofsts[NUM_NEIGHBORS] += num_neighbors;
    block_ofsts[NUM_BLOCKS]++;

    /* debug */
/*     fprintf(stderr, "gid = %d num_verts = %d num_complete_cells = %d " */
/* 	    "tot_num_cell_faces = %d tot_num_face_verts = %d " */
/* 	    "num_orig_particles = %d\n", */
/* 	    gid, v->num_verts, v->num_complete_cells, v->tot_num_cell_faces, */
/* 	    v->tot_num_face_verts, v->num_orig_particles); */

  }

  err = ncmpi_close(ncid); ERR;
#endif

}
Esempio n. 4
0
//----------------------------------------------------------------------------
//
// radix-k merge
//
// did: decomposition id
// its: pointers to input/ouput items, results in first number of output items
// hdrs: pointers to input headers (optional, pass NULL if unnecessary)
// nr: number of rounds
// kv: k vector, radix for each round
// cc: pointer to communicate object
// assign: pointer to assignment object
// merge_func: pointer to merging function
// create_func: pointer to function that creates item
// destroy_func: pointer to function that destroys item
// type_func: pointer to function that creates MPI datatype for item 
//
// side effects: allocates output items and array of pointers to them, if
//   not reducing in-place
//
// returns: number of output items
//
int Merge::MergeBlocks(int did, char **its, int **hdrs, 
		       int nr, int *kv, Comm *cc, Assignment *assign,
		       void (*merge_func)(char **, int *, int, int *), 
		       char * (*create_func)(int *),
		       void (*destroy_func)(void *),
		       void (*type_func)(void*, MPI_Datatype*, int *)) {

  int rank, groupsize; // MPI usual
  int gid; // global id of current item block
  int p; // process rank
  MPI_Datatype dtype; // data type
  int ng; // number of groups this process owns
  int nb = assign->NumBlks(); // number of blocks this process owns
  vector<char *> my_its(its, its + nb);  // copy of its
  vector<bool> done(nb, false); // done items
  vector<int> root_gids; // distinct gids of root blocks

  // init
  assert(nr > 0 && nr <= DIY_MAX_R); // sanity
  MPI_Comm_rank(comm, &rank);
  MPI_Comm_size(comm, &groupsize);

  // for all rounds
  for (int r = 0; r < nr; r++){

    int n_recv = 0; // number of received blocks by root
    int partners[kv[r]]; // gids of partners in a group
    root_gids.clear();
    root_gids.reserve(kv[r]);

    // all my current blocks must participate in a round (send or receive)
    for (int b = 0; b < nb; b++) {

      if (!done[b]) { // blocks that survived to this round

	gid = DIY_Gid(did, b);
	bool root = GetPartners(kv, r, gid, partners);

	if (!root) { // nonroots post sends of headers and items
	  p = assign->Gid2Proc(partners[kv[r] - 1]);
	  if (hdrs)
	    type_func(my_its[b], &dtype, hdrs[b]);
	  else
	    type_func(my_its[b], &dtype, NULL);
	  // tag is source block gid
	  if (hdrs && dtype_absolute_address)
	    cc->SendItem((char *)MPI_BOTTOM, hdrs[b], p, gid, &dtype);
	  else if (hdrs && !dtype_absolute_address)
	    cc->SendItem((char *)my_its[b], hdrs[b], p, gid, &dtype);
	  else if (!hdrs && dtype_absolute_address)
	    cc->SendItem((char *)MPI_BOTTOM, NULL, p, gid, &dtype);
	  else
	    cc->SendItem((char *)my_its[b], NULL, p, gid, &dtype);
	  MPI_Type_free(&dtype);
	  done[b] = true; // nonroot blocks are done after they have been sent
	}

	else { // root posts receives of headers
	  root_gids.push_back(partners[kv[r] - 1]);
	  for (int k = 0; k < kv[r] - 1; k++) { // receive the others
	    p = assign->Gid2Proc(partners[k]);
	    cc->StartRecvItem(p, hdrs);
	    n_recv++;
	  }
	}

      } // blocks that survived to this round

    } // all my current blocks

    // finish receiving all items
    char *recv_its[n_recv]; // received items
    int recv_gids[n_recv]; // (source) gids of the received items
    int recv_procs[n_recv]; // source proc of each received item
    cc->FinishRecvItemsMerge(recv_its, recv_gids, recv_procs, create_func, 
			     type_func); 

    // merge each group
    ng = (int)root_gids.size(); // number of groups this process owns
    for (int j = 0; j < ng; j++) {

      vector<char *>reduce_its; // items ready for reduction in a group
      vector<int>reduce_gids; // gids for reduce_its
      reduce_its.reserve(kv[r]);
      reduce_gids.reserve(kv[r]);

      int lid = assign->Gid2Lid(root_gids[j]);
      reduce_its.push_back(my_its[lid]);
      reduce_gids.push_back(root_gids[j]);

      GetPartners(kv, r, root_gids[j], partners);

      for (int i = 0; i < n_recv; i++) { // collect items for this group
	if (find(partners, partners + kv[r], recv_gids[i]) != 
	    partners + kv[r]) {
	  reduce_its.push_back(recv_its[i]);
	  reduce_gids.push_back(recv_gids[i]);
	}
      }

      // header from root block of merge is used
      if (hdrs)
	merge_func(&reduce_its[0], &reduce_gids[0], kv[r], hdrs[lid]);
      else
	merge_func(&reduce_its[0], &reduce_gids[0], kv[r], NULL);
      my_its[lid] = reduce_its[0];

    }

    // cleanup
    if (ng) {
      for (int i = 0; i < n_recv; i++)
	destroy_func(recv_its[i]);
    }

  } // for all rounds

  // move results to the front, swapping them rather than copying so that user
  // can free all items without having duplicated pointers that get freed
  // multiple times
  for (int i = 0; i < ng; i++) {
    char *temp = its[i];
    its[i] = my_its[assign->Gid2Lid(root_gids[i])];
    its[assign->Gid2Lid(root_gids[i])] = temp;
  }

  return ng;

}