コード例 #1
0
ファイル: xmpf_lib.c プロジェクト: clementval/omni-compiler
void xmp_gather_(_XMP_array_t **x_d, _XMP_array_t **a_d, ... )
{
  int          i;
  va_list      valst;
  _XMP_array_t *idx_p;
  _XMP_array_t **idx_pp;
  _XMP_array_t **idx_array;
//  _XMP_array_t *x_p = *(_XMP_array_t **)x_d;
  _XMP_array_t *a_p = *(_XMP_array_t **)a_d;

  idx_array = (_XMP_array_t **)_XMP_alloc(sizeof(_XMP_array_t *)*a_p->dim);

  va_start( valst, a_d );

  for(i=0;i<a_p->dim;i++){
     idx_pp = va_arg( valst , _XMP_array_t** );
     idx_p  = *(_XMP_array_t **)idx_pp;
     idx_array[i] = idx_p;
  }

  va_end(valst);

   xmpf_gather(*x_d, *a_d, idx_array);

   _XMP_free(idx_array);
}
コード例 #2
0
void _XMP_reduce_gpu_CLAUSE(void *dev_addr, int count, int datatype, int op) {
  // setup information
  MPI_Datatype mpi_datatype = MPI_DATATYPE_NULL;
  size_t datatype_size = 0;
  MPI_Op mpi_op;
  _XMP_setup_reduce_type(&mpi_datatype, &datatype_size, datatype);
  _XMP_setup_reduce_op(&mpi_op, op);

  size_t size = datatype_size * count;
  void *host_buf = _XMP_alloc(size);
  cudaError_t e;

  // copy dev to host
  e = cudaMemcpy(host_buf, dev_addr, size, cudaMemcpyDeviceToHost);
  cudaErrorCheck(e);

  // reduce
  MPI_Allreduce(MPI_IN_PLACE, host_buf, count, mpi_datatype, mpi_op, *((MPI_Comm *)(_XMP_get_execution_nodes())->comm));

  // copy host to dev
  e = cudaMemcpy(dev_addr, host_buf, size, cudaMemcpyHostToDevice);
  cudaErrorCheck(e);

  _XMP_free(host_buf);
}
コード例 #3
0
static void _mpi_scalar_mput(const int target_rank, 
			     const _XMP_coarray_t *dst_desc, const void *src,
			     const size_t dst_offset, const size_t src_offset,
			     const int dst_dims, 
			     const _XMP_array_section_t *dst_info,
			     const bool is_dst_on_acc)
{
  int allelmt_dim = _XMP_get_dim_of_allelmts(dst_dims, dst_info);
  size_t element_size = dst_desc->elmt_size;
  size_t allelmt_size = (allelmt_dim == dst_dims)? element_size : dst_info[allelmt_dim].distance * dst_info[allelmt_dim].elmts;
  char *laddr = (allelmt_dim == dst_dims)? ((char*)src + src_offset) : _XMP_alloc(allelmt_size);
  char *raddr = get_remote_addr(dst_desc, target_rank, is_dst_on_acc) + dst_offset;
  MPI_Win win = get_window(dst_desc, is_dst_on_acc);

  XACC_DEBUG("scalar_mput(src_p=%p, size=%zd, target=%d, dst_p=%p, is_acc=%d)", laddr, element_size, target_rank, raddr, is_dst_on_acc);

  XACC_DEBUG("allelmt_dim=%d, dst_dims=%d", allelmt_dim, dst_dims);
  if(allelmt_dim != dst_dims){
    //mcopy
    _XMP_array_section_t info;
    info.start = 0;
    info.length = allelmt_size/element_size;
    info.stride = 1;
    info.elmts = info.length;
    info.distance = element_size;
    _XMP_stride_memcpy_1dim(laddr, (char*)src+src_offset, &info, element_size, _XMP_SCALAR_MCOPY);
    XACC_DEBUG("mcopy(%lld, %lld, %lld), %lld",info.start, info.length, info.stride, info.elmts);
  }
  long long idxs[allelmt_dim+1];
  for(int i = 0; i < allelmt_dim+1; i++) idxs[i]=0;

  while(1){
    size_t offset = 0;
    for(int i = 0; i < allelmt_dim; i++){
      offset += dst_info[i].distance * idxs[i+1] * dst_info[i].stride;
    }

    MPI_Put((void*)laddr, allelmt_size, MPI_BYTE, target_rank,
	    (MPI_Aint)(raddr+offset), allelmt_size, MPI_BYTE,
	    win);

    ++idxs[allelmt_dim];
    for(int i = allelmt_dim-1; i >= 0; i--){
      long long length = dst_info[i].length;
      if(idxs[i+1] >= length){
	idxs[i+1] -= length;
	++idxs[i];
      }else{
	break;
      }
    }
    if(idxs[0] > 0){
      break;
    }
  }
  _wait_puts(target_rank, win);
  if(allelmt_dim != dst_dims){
    _XMP_free(laddr);
  }
}
コード例 #4
0
void _XMP_reduce_gpu_NODES_ENTIRE(_XMP_nodes_t *nodes, void *dev_addr, int count, int datatype, int op)
{
  if (count == 0) {
    return; // FIXME not good implementation
  }
  if (!nodes->is_member) {
    return;
  }

  // setup information
  MPI_Datatype mpi_datatype = MPI_DATATYPE_NULL;
  size_t datatype_size = 0;
  MPI_Op mpi_op;
  _XMP_setup_reduce_type(&mpi_datatype, &datatype_size, datatype);
  _XMP_setup_reduce_op(&mpi_op, op);

  size_t size = datatype_size * count;
  void *host_buf = _XMP_alloc(size);
  cudaError_t e;

  // copy dev to host
  e = cudaMemcpy(host_buf, dev_addr, size, cudaMemcpyDeviceToHost);
  cudaErrorCheck(e);

  MPI_Allreduce(MPI_IN_PLACE, host_buf, count, mpi_datatype, mpi_op, *((MPI_Comm *)nodes->comm));

  // copy host to dev
  e = cudaMemcpy(dev_addr, host_buf, size, cudaMemcpyHostToDevice);
  cudaErrorCheck(e);

  _XMP_free(host_buf);
}
コード例 #5
0
/**
   Destroy shift queue
 */
void _XMP_mpi_destroy_shift_queue(bool is_acc)
{
  struct _shift_queue_t *shift_queue = is_acc? &_shift_queue_acc : &_shift_queue;

  _XMP_free(shift_queue->shifts);
  shift_queue->shifts = NULL;
}
コード例 #6
0
void _XMP_tca_comm_finalize()
{
  for(int i = 0; i < _XMP_world_size; i++){
    if(i == _XMP_world_rank) continue;
    _XMP_tca_ring_buf_finalize(&_ring_bufs[i]);
  }
  _XMP_free(_ring_bufs);
  //fprintf(stderr, "spin_wait_count=%llu\n", spin_wait_count);
}
コード例 #7
0
void _XMP_mpi_coarray_detach(_XMP_coarray_t *coarray_desc, const bool is_acc)
{
  if(_XMP_flag_multi_win){
    MPI_Win win = is_acc? coarray_desc->win_acc : coarray_desc->win;
    MPI_Win_unlock_all(win);
    _XMP_barrier_EXEC();
    _XMP_mpi_onesided_destroy_win(&win);
  }else{
    MPI_Win win = _xmp_mpi_distarray_win;
    void *real_addr = coarray_desc->real_addr;
#ifdef _XMP_XACC
    if(is_acc){
      win = _xmp_mpi_distarray_win_acc;
      real_addr = coarray_desc->real_addr_dev;
    }
#endif

    MPI_Win_detach(win, real_addr);
  }

  if(is_acc){
#ifdef _XMP_XACC
    _XMP_free(coarray_desc->addr_dev); //FIXME may be wrong
    coarray_desc->addr_dev = NULL;
    coarray_desc->real_addr_dev = NULL;
    coarray_desc->win_acc = MPI_WIN_NULL;
    coarray_desc->nodes = NULL;
#endif
  }else{
    _XMP_free(coarray_desc->addr);
    coarray_desc->addr = NULL;
    coarray_desc->real_addr = NULL;
    coarray_desc->win = MPI_WIN_NULL;
    coarray_desc->nodes = NULL;
  }
}
コード例 #8
0
void _XMP_reflect_async__(_XMP_array_t *a, int async_id)
{

  if (!a->is_allocated){
    _xmp_set_reflect_flag = 0;
    return;
  }

  if (!_xmp_set_reflect_flag){
    for (int i = 0; i < a->dim; i++){
      _XMP_array_info_t *ai = &(a->info[i]);
      _xmp_lwidth[i] = ai->shadow_size_lo;
      _xmp_uwidth[i] = ai->shadow_size_hi;
      _xmp_is_periodic[i] = 0;
    }
  }
    
  _XMP_reflect_sched(a, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic, 1);
  _XMP_reflect_start(a, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic, async_id);

  _XMP_async_comm_t *async = _XMP_get_current_async();
  _XMP_free(async->reqs); async->reqs = NULL; // reqs not needed in RDMA reflects.

  for (int i = 0; i < a->dim; i++){
    _XMP_reflect_sched_t *reflect = a->info[i].reflect_sched;
    if (_xmp_lwidth[i] && reflect->hi_rank != -1) async->nreqs += reflect->count;
    if (_xmp_uwidth[i] && reflect->lo_rank != -1) async->nreqs += reflect->count;
  }

  _xmp_set_reflect_flag = 0;
  for (int i = 0; i < a->dim; i++){
    _xmp_lwidth[i] = 0;
    _xmp_uwidth[i] = 0;
    _xmp_is_periodic[i] = 0;
  }

}
コード例 #9
0
ファイル: xmpf_lib.c プロジェクト: clementval/omni-compiler
void xmp_transpose_(_XMP_array_t **dst_d, _XMP_array_t **src_d, int *opt){

#if 1
   xmpf_transpose(*dst_d, *src_d, *opt);
   return;
#else
  _XMP_array_t *dst_array = *(_XMP_array_t **)dst_d;
  _XMP_array_t *src_array = *(_XMP_array_t **)src_d;

  int nnodes;

  int dst_block_dim, src_block_dim;

  void *sendbuf=NULL, *recvbuf=NULL;
  unsigned long long count, bufsize;

  int dst_chunk_size, dst_ser_size, type_size;
  int src_chunk_size, src_ser_size;

  nnodes = dst_array->align_template->onto_nodes->comm_size;

  // 2-dimensional Matrix
  if (dst_array->dim != 2) {
    _XMP_fatal("bad dimension for xmp_transpose");
  }

  // No Shadow
  if (dst_array->info[0].shadow_size_lo != 0 ||
      dst_array->info[0].shadow_size_hi != 0 ||
      src_array->info[0].shadow_size_lo != 0 ||
      src_array->info[0].shadow_size_hi != 0) {
   _XMP_fatal("A global array must not have shadows");
  fflush(stdout);
  }

  // Dividable by the number of nodes
  if (dst_array->info[0].ser_size % nnodes != 0) {
   _XMP_fatal("Not dividable by the number of nodes");
  fflush(stdout);
  }

  dst_block_dim = (dst_array->info[0].align_manner == _XMP_N_ALIGN_BLOCK) ? 0 : 1;
  src_block_dim = (src_array->info[0].align_manner == _XMP_N_ALIGN_BLOCK) ? 0 : 1;

  dst_chunk_size = dst_array->info[dst_block_dim].par_size;
  dst_ser_size = dst_array->info[dst_block_dim].ser_size;
  src_chunk_size = src_array->info[src_block_dim].par_size;
  src_ser_size = src_array->info[src_block_dim].ser_size;
  type_size = dst_array->type_size;

  count =  dst_chunk_size * src_chunk_size;
  bufsize = count * nnodes * type_size;

  _XMP_check_reflect_type();

  if (src_block_dim == 1){
    if (*opt ==0){
      sendbuf = _XMP_alloc(bufsize);
    }else if (*opt==1){
      sendbuf = dst_array->array_addr_p;
    }
    // src_array -> sendbuf
    _XMP_pack_vector2((char *)sendbuf, (char *)src_array->array_addr_p ,
		      src_chunk_size, dst_chunk_size, nnodes, type_size,
		      src_block_dim);
  }
  else {
    sendbuf = src_array->array_addr_p;
  }

  if (*opt == 0){
    recvbuf = _XMP_alloc(bufsize);
  }else if (*opt ==1){
    recvbuf = src_array->array_addr_p;
  }
  MPI_Alltoall(sendbuf, count * type_size, MPI_BYTE, recvbuf, count * type_size,
               MPI_BYTE, *((MPI_Comm *)src_array->align_template->onto_nodes->comm));

  if (dst_block_dim == 1){
    _XMPF_unpack_transpose_vector((char *)dst_array->array_addr_p ,
       (char *)recvbuf , src_ser_size, dst_chunk_size, type_size, dst_block_dim);

    if (*opt==0){
      _XMP_free(recvbuf);
    }
  }

  if (src_block_dim == 1){
    if (*opt == 0){
      _XMP_free(sendbuf);
    }
  }


  return;
#endif
}
コード例 #10
0
void _XMP_reflect_pcopy_sched_dim(_XMP_array_t *adesc, int target_dim,
				  int lwidth, int uwidth, int is_periodic, int shadow_comm_type){

  if (lwidth == 0 && uwidth == 0) return;

  _XMP_array_info_t *ai = &(adesc->info[target_dim]);
  _XMP_array_info_t *ainfo = adesc->info;
  _XMP_ASSERT(ai->align_manner == _XMP_N_ALIGN_BLOCK);
  _XMP_ASSERT(ai->is_shadow_comm_member);

  if (lwidth > ai->shadow_size_lo || uwidth > ai->shadow_size_hi){
    _XMP_fatal("reflect width is larger than shadow width.");
  }

  _XMP_reflect_sched_t *reflect = ai->reflect_sched;

  int target_tdim = ai->align_template_index;
  _XMP_nodes_info_t *ni = adesc->align_template->chunk[target_tdim].onto_nodes_info;

  if (ni->size == 1 && !is_periodic) return;

  int ndims = adesc->dim;

  // 0-origin
  int my_pos = ni->rank;
  int lb_pos = _XMP_get_owner_pos(adesc, target_dim, ai->ser_lower);
  int ub_pos = _XMP_get_owner_pos(adesc, target_dim, ai->ser_upper);

  int lo_pos = (my_pos == lb_pos) ? ub_pos : my_pos - 1;
  int hi_pos = (my_pos == ub_pos) ? lb_pos : my_pos + 1;

  MPI_Comm *comm = adesc->align_template->onto_nodes->comm;
  int my_rank = adesc->align_template->onto_nodes->comm_rank;

  int lo_rank = my_rank + (lo_pos - my_pos) * ni->multiplier;
  int hi_rank = my_rank + (hi_pos - my_pos) * ni->multiplier;

  int count = 0, blocklength = 0;
  long long stride = 0;

  int type_size = adesc->type_size;
  void *array_addr = adesc->array_addr_p;

  void *lo_send_array = NULL, *lo_recv_array = NULL;
  void *hi_send_array = NULL, *hi_recv_array = NULL;

  void *lo_send_buf = NULL;
  void *lo_recv_buf = NULL;
  void *hi_send_buf = NULL;
  void *hi_recv_buf = NULL;

  int lo_buf_size = 0;
  int hi_buf_size = 0;

  if (reflect->prev_pcopy_sched_type &&
      lwidth == reflect->lo_width &&
      uwidth == reflect->hi_width &&
      is_periodic == reflect->is_periodic){
    if ((adesc->order == MPI_ORDER_FORTRAN && target_dim != ndims - 1) ||
	(adesc->order == MPI_ORDER_C && target_dim != 0)){
      goto init_comm;
    }
    else if (reflect->prev_pcopy_sched_type != shadow_comm_type){
      count = reflect->count;
      blocklength = reflect->blocklength;
      stride = reflect->stride;
      goto alloc_buf;
    }
  }
  
  //
  // setup data_type
  //

  if (adesc->order == MPI_ORDER_FORTRAN){ /* for XMP/F */

    count = 1;
    blocklength = type_size;
    stride = ainfo[0].alloc_size * type_size;

    for (int i = ndims - 2; i >= target_dim; i--){
      count *= ainfo[i+1].alloc_size;
    }

    for (int i = 1; i <= target_dim; i++){
      blocklength *= ainfo[i-1].alloc_size;
      stride *= ainfo[i].alloc_size;
    }

  }
  else if (adesc->order == MPI_ORDER_C){ /* for XMP/C */

    count = 1;
    blocklength = type_size;
    stride = ainfo[ndims-1].alloc_size * type_size;

    for (int i = 1; i <= target_dim; i++){
      count *= ainfo[i-1].alloc_size;
    }

    for (int i = ndims - 2; i >= target_dim; i--){
      blocklength *= ainfo[i+1].alloc_size;
      stride *= ainfo[i].alloc_size;
    }

  }
  else {
    _XMP_fatal("cannot determin the base language.");
  }

  //
  // calculate base address
  //

 alloc_buf:
  
  // for lower reflect

  if (lwidth){

    lo_send_array = array_addr;
    lo_recv_array = array_addr;

    for (int i = 0; i < ndims; i++) {

      int lb_send, lb_recv;
      unsigned long long dim_acc;

      if (i == target_dim) {
	lb_send = ainfo[i].local_upper - lwidth + 1;
	lb_recv = ainfo[i].shadow_size_lo - lwidth;;
      }
      else {
	// Note: including shadow area
	lb_send = 0;
	lb_recv = 0;
      }

      dim_acc = ainfo[i].dim_acc;

      lo_send_array = (void *)((char *)lo_send_array + lb_send * dim_acc * type_size);
      lo_recv_array = (void *)((char *)lo_recv_array + lb_recv * dim_acc * type_size);

    }

  }

  // for upper reflect

  if (uwidth){

    hi_send_array = array_addr;
    hi_recv_array = array_addr;

    for (int i = 0; i < ndims; i++) {

      int lb_send, lb_recv;
      unsigned long long dim_acc;

      if (i == target_dim) {
	lb_send = ainfo[i].local_lower;
	lb_recv = ainfo[i].local_upper + 1;
      }
      else {
	// Note: including shadow area
	lb_send = 0;
	lb_recv = 0;
      }

      dim_acc = ainfo[i].dim_acc;

      hi_send_array = (void *)((char *)hi_send_array + lb_send * dim_acc * type_size);
      hi_recv_array = (void *)((char *)hi_recv_array + lb_recv * dim_acc * type_size);

    }

  }

  //
  // Allocate buffers
  //

  if (reflect->prev_pcopy_sched_type == _XMP_COMM_REFLECT &&
      ((adesc->order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
       (adesc->order == MPI_ORDER_C && target_dim == 0))){
    ;
  }
  else {
    _XMP_free(reflect->lo_send_buf);
    _XMP_free(reflect->lo_recv_buf);
    _XMP_free(reflect->hi_send_buf);
    _XMP_free(reflect->hi_recv_buf);
  }

  // for lower reflect

  if (lwidth){

    lo_buf_size = lwidth * blocklength * count;

    if (shadow_comm_type == _XMP_COMM_REFLECT &&
	((adesc->order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
	 (adesc->order == MPI_ORDER_C && target_dim == 0))){
      lo_send_buf = lo_send_array;
      lo_recv_buf = lo_recv_array;
    }
    else {
      _XMP_TSTART(t0);
      lo_send_buf = _XMP_alloc(lo_buf_size);
      lo_recv_buf = _XMP_alloc(lo_buf_size);
      _XMP_TEND2(xmptiming_.t_mem, xmptiming_.tdim_mem[target_dim], t0);
    }

  }

  // for upper reflect

  if (uwidth){

    hi_buf_size = uwidth * blocklength * count;

    if (shadow_comm_type == _XMP_COMM_REFLECT &&
	((adesc->order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
	 (adesc->order == MPI_ORDER_C && target_dim == 0))){
      hi_send_buf = hi_send_array;
      hi_recv_buf = hi_recv_array;
    }
    else {
      _XMP_TSTART(t0);
      hi_send_buf = _XMP_alloc(hi_buf_size);
      hi_recv_buf = _XMP_alloc(hi_buf_size);
      _XMP_TEND2(xmptiming_.t_mem, xmptiming_.tdim_mem[target_dim], t0);
    }

  }

  //
  // cache schedule
  //

  reflect->count = count;
  reflect->blocklength = blocklength;
  reflect->stride = stride;

  reflect->lo_send_array = lo_send_array;
  reflect->lo_recv_array = lo_recv_array;
  reflect->hi_send_array = hi_send_array;
  reflect->hi_recv_array = hi_recv_array;

  reflect->lo_send_buf = lo_send_buf;
  reflect->lo_recv_buf = lo_recv_buf;
  reflect->hi_send_buf = hi_send_buf;
  reflect->hi_recv_buf = hi_recv_buf;

  //
  // initialize communication
  //

  int src, dst;

 init_comm:
  
  if (!is_periodic && my_pos == lb_pos){ // no periodic
    lo_rank = MPI_PROC_NULL;
  }

  if (!is_periodic && my_pos == ub_pos){ // no periodic
    hi_rank = MPI_PROC_NULL;
  }

  lo_buf_size = lwidth * reflect->blocklength * reflect->count;
  hi_buf_size = uwidth * reflect->blocklength * reflect->count;

  // for lower shadow

  if (lwidth){
    src = lo_rank;
    dst = hi_rank;
  }
  else {
    src = MPI_PROC_NULL;
    dst = MPI_PROC_NULL;
  }

  if (shadow_comm_type == _XMP_COMM_REDUCE_SHADOW){
    if (reflect->req_reduce[0] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req_reduce[0]);
    }
	
    if (reflect->req_reduce[1] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req_reduce[1]);
    }

    MPI_Send_init(reflect->lo_recv_buf, lo_buf_size, MPI_BYTE, src,
		  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req_reduce[0]);
    MPI_Recv_init(reflect->lo_send_buf, lo_buf_size, MPI_BYTE, dst,
		  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req_reduce[1]);
  }
  else {
    if (reflect->req[0] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req[0]);
    }
	
    if (reflect->req[1] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req[1]);
    }

    MPI_Recv_init(reflect->lo_recv_buf, lo_buf_size, MPI_BYTE, src,
		  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req[0]);
    MPI_Send_init(reflect->lo_send_buf, lo_buf_size, MPI_BYTE, dst,
		  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req[1]);
  }
  
  // for upper shadow

  if (uwidth){
    src = hi_rank;
    dst = lo_rank;
  }
  else {
    src = MPI_PROC_NULL;
    dst = MPI_PROC_NULL;
  }

  if (shadow_comm_type == _XMP_COMM_REDUCE_SHADOW){
    if (reflect->req_reduce[2] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req_reduce[2]);
    }
	
    if (reflect->req_reduce[3] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req_reduce[3]);
    }

    MPI_Send_init(reflect->hi_recv_buf, hi_buf_size, MPI_BYTE, src,
		  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req_reduce[2]);
    MPI_Recv_init(reflect->hi_send_buf, hi_buf_size, MPI_BYTE, dst,
		  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req_reduce[3]);
  }
  else {
    if (reflect->req[2] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req[2]);
    }
	
    if (reflect->req[3] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req[3]);
    }

    MPI_Recv_init(reflect->hi_recv_buf, hi_buf_size, MPI_BYTE, src,
		  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req[2]);
    MPI_Send_init(reflect->hi_send_buf, hi_buf_size, MPI_BYTE, dst,
		  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req[3]);
  }
  
  reflect->prev_pcopy_sched_type = shadow_comm_type;
  
  reflect->lo_rank = lo_rank;
  reflect->hi_rank = hi_rank;

}