Esempio n. 1
0
void _XMP_reflect_async_cardinal(_XMP_array_t *a, int async_id)
{

  _XMP_async_comm_t *async = _XMP_get_current_async();
  MPI_Request *reqs = &async->reqs[async->nreqs];
  int nreqs = 0;

  _XMP_TSTART(t0);
  for (int i = 0; i < a->dim; i++){

    _XMP_array_info_t *ai = &(a->info[i]);

    if (ai->shadow_type == _XMP_N_SHADOW_NONE){
      continue;
    }
    else if (ai->shadow_type == _XMP_N_SHADOW_NORMAL){

      _XMP_reflect_sched_t *reflect = ai->reflect_sched;

      if (_xmp_lwidth[i] || _xmp_uwidth[i]){

	_XMP_ASSERT(reflect);

	if (reflect->is_periodic == -1 /* not set yet */ ||
	    _xmp_lwidth[i] != reflect->lo_width ||
	    _xmp_uwidth[i] != reflect->hi_width ||
	    _xmp_is_periodic[i] != reflect->is_periodic){

	  reflect->lo_width = _xmp_lwidth[i];
	  reflect->hi_width = _xmp_uwidth[i];
	  reflect->is_periodic = _xmp_is_periodic[i];

	  _XMP_reflect_normal_sched_dim(a, i, _xmp_lwidth[i], _xmp_uwidth[i], _xmp_is_periodic[i]);

	}

	if (async->nreqs + nreqs + 4 > _XMP_MAX_ASYNC_REQS){
	  _XMP_fatal("too many arrays in an asynchronous reflect");
	}
	memcpy(&reqs[nreqs], reflect->req, 4 * sizeof(MPI_Request));
	nreqs += 4;

	_XMP_TSTART(t0);
	if (reflect->req[0] != MPI_REQUEST_NULL) // if req[0] isn't null, any others shouldn't be null.
	  MPI_Startall(4, reflect->req);
	_XMP_TEND2(xmptiming_.t_comm, xmptiming_.tdim_comm[i], t0);

      }

    }
    else { /* _XMP_N_SHADOW_FULL */
      _XMP_reflect_shadow_FULL(a->array_addr_p, a, i);
    }
    
  }
  _XMP_TEND(xmptiming_.t_sched, t0);

  async->nreqs += nreqs;

}
Esempio n. 2
0
static void _XMP_reflect_wait(_XMP_array_t *a, int *lwidth, int *uwidth, int *is_periodic)
{

  int nrdmas0 = 0, nrdmas1 = 0;

  _XMP_TSTART(t0);

  for (int i = 0; i < a->dim; i++){
    _XMP_reflect_sched_t *reflect = a->info[i].reflect_sched;
    if (lwidth[i] && reflect->hi_rank != -1) nrdmas0 += reflect->count;
    if (uwidth[i] && reflect->lo_rank != -1) nrdmas1 += reflect->count;
  }

  while (nrdmas0 || nrdmas1){
    while (FJMPI_Rdma_poll_cq(FJMPI_RDMA_NIC0, NULL) == FJMPI_RDMA_NOTICE){
      nrdmas0--;
    }
    while (FJMPI_Rdma_poll_cq(FJMPI_RDMA_NIC1, NULL) == FJMPI_RDMA_NOTICE){
      nrdmas1--;
    }
  }

  xmp_barrier();

  _XMP_TEND(xmptiming_.t_wait, t0);

}
Esempio n. 3
0
static void _XMP_reflect_wait(_XMP_array_t *a, int *lwidth, int *uwidth, int *is_periodic)
{
  for (int i = 0; i < a->dim; i++){

    if (!lwidth[i] && !uwidth[i]) continue;

    _XMP_array_info_t *ai = &(a->info[i]);

    if (ai->shadow_type == _XMP_N_SHADOW_NORMAL){

      _XMP_reflect_sched_t *reflect = ai->reflect_sched;

      _XMP_TSTART(t0);
      MPI_Waitall(4, reflect->req, MPI_STATUSES_IGNORE);
      _XMP_TEND2(xmptiming_.t_wait, xmptiming_.tdim_wait[i], t0);
    }
    else if (ai->shadow_type == _XMP_N_SHADOW_FULL){
      //_XMP_reflect_shadow_FULL(a->array_addr_p, a, i);
    }

  }

  if (_xmp_reflect_pack_flag){
    _XMP_TSTART(t0);
    _XMP_reflect_unpack(a, lwidth, uwidth, is_periodic);
    _XMP_TEND(xmptiming_.t_copy, t0);
  }

}
Esempio n. 4
0
static void _XMP_reflect_start(_XMP_array_t *a, int *lwidth, int *uwidth, int *is_periodic,
			       int tag)
{
  _XMP_TSTART(t1);

  xmp_barrier();

  for (int i = 0; i < a->dim; i++){

    _XMP_reflect_sched_t *reflect = a->info[i].reflect_sched;

    _XMP_TSTART(t0);

    // for lower reflect

    if (lwidth[i] && reflect->hi_rank != -1){
      for (int j = 0; j < reflect->count; j++){
	FJMPI_Rdma_put(reflect->hi_rank, tag,
		       (uint64_t)reflect->lo_recv_array + j * reflect->stride,
		       (uint64_t)reflect->lo_send_array + j * reflect->stride,
		       lwidth[i] * reflect->blocklength,
		       FJMPI_RDMA_LOCAL_NIC0 | FJMPI_RDMA_REMOTE_NIC2);
      }
    }

    // for upper reflect

    if (uwidth[i] && reflect->lo_rank != -1){
      for (int j = 0; j < reflect->count; j++){
	FJMPI_Rdma_put(reflect->lo_rank, tag,
		       (uint64_t)reflect->hi_recv_array + j * reflect->stride,
		       (uint64_t)reflect->hi_send_array + j * reflect->stride,
		       uwidth[i] * reflect->blocklength,
		       FJMPI_RDMA_LOCAL_NIC1 | FJMPI_RDMA_REMOTE_NIC3);
      }
    }

    _XMP_TEND(xmptiming_.tdim_comm[i], t0);

  }

  _XMP_TEND(xmptiming_.t_comm, t1);

}
Esempio n. 5
0
void _XMP_reflect__(_XMP_array_t *a)
{

  int is_ordinal = 1;

  //_XMP_RETURN_IF_SINGLE;
  if (!a->is_allocated){
    _xmp_set_reflect_flag = 0;
    return;
  }

  if (!_xmp_set_reflect_flag){
    for (int i = 0; i < a->dim; i++){
      _XMP_array_info_t *ai = &(a->info[i]);
      _xmp_lwidth[i] = ai->shadow_size_lo;
      _xmp_uwidth[i] = ai->shadow_size_hi;
      _xmp_is_periodic[i] = 0;
    }
  }

  _XMP_TSTART(t0);
  for (int i = 0; i < a->dim; i++){

    _XMP_array_info_t *ai = &(a->info[i]);

    if (ai->shadow_type == _XMP_N_SHADOW_NONE){
      continue;
    }
    else if (ai->shadow_type == _XMP_N_SHADOW_NORMAL){

      _XMP_reflect_sched_t *reflect = ai->reflect_sched;

      if (_xmp_lwidth[i] || _xmp_uwidth[i]){

	_XMP_ASSERT(reflect);

	if (reflect->is_periodic == -1 /* not set yet */ ||
	    _xmp_lwidth[i] != reflect->lo_width ||
	    _xmp_uwidth[i] != reflect->hi_width ||
	    _xmp_is_periodic[i] != reflect->is_periodic){

	  reflect->lo_width = _xmp_lwidth[i];
	  reflect->hi_width = _xmp_uwidth[i];
	  reflect->is_periodic = _xmp_is_periodic[i];

	  _XMP_reflect_rdma_sched_dim(a, i, _xmp_lwidth[i], _xmp_uwidth[i], _xmp_is_periodic[i]);

	}

      }

    }
    else { /* _XMP_N_SHADOW_FULL */
      ;
    }
    
  }
  _XMP_TEND(xmptiming_.t_sched, t0);

  _XMP_reflect_start(a, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic, 0);

  _XMP_reflect_wait(a, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic);

  _xmp_set_reflect_flag = 0;
  for (int i = 0; i < a->dim; i++){
    _xmp_lwidth[i] = 0;
    _xmp_uwidth[i] = 0;
    _xmp_is_periodic[i] = 0;
  }

}
Esempio n. 6
0
void _XMP_reflect__(_XMP_array_t *a)
{

  int is_ordinal = 1;

  //_XMP_RETURN_IF_SINGLE;
  if (!a->is_allocated){
    _xmp_set_reflect_flag = 0;
    return;
  }

  if (!_xmp_set_reflect_flag){
    for (int i = 0; i < a->dim; i++){
      _XMP_array_info_t *ai = &(a->info[i]);
      _xmp_lwidth[i] = ai->shadow_size_lo;
      _xmp_uwidth[i] = ai->shadow_size_hi;
      _xmp_is_periodic[i] = 0;
    }
  }

  _XMP_TSTART(t0);
  for (int i = 0; i < a->dim; i++){

    _XMP_array_info_t *ai = &(a->info[i]);

    if (ai->shadow_type == _XMP_N_SHADOW_NONE){
      continue;
    }
    else if (ai->shadow_type == _XMP_N_SHADOW_NORMAL){

      _XMP_reflect_sched_t *reflect = ai->reflect_sched;

      if (_xmp_lwidth[i] || _xmp_uwidth[i]){

	_XMP_ASSERT(reflect);

	/* if (!reflect->reflect_is_initialized || */
	/*     _xmp_lwidth[i] != reflect->lo_width || */
	/*     _xmp_uwidth[i] != reflect->hi_width || */
	/*     _xmp_is_periodic[i] != reflect->is_periodic){ */

	/*   reflect->lo_width = _xmp_lwidth[i]; */
	/*   reflect->hi_width = _xmp_uwidth[i]; */
	/*   reflect->is_periodic = _xmp_is_periodic[i]; */

	/*   if (_xmp_reflect_pack_flag){ */
	/*     _XMP_reflect_pcopy_sched_dim(a, i, _xmp_lwidth[i], _xmp_uwidth[i], _xmp_is_periodic[i], 0); */
	/*   } */
	/*   else { */
	/*     _XMP_reflect_normal_sched_dim(a, i, _xmp_lwidth[i], _xmp_uwidth[i], _xmp_is_periodic[i]); */
	/*   } */

	/*   reflect->reflect_is_initialized = 1; */
	/* } */

	if (!reflect->reflect_is_initialized ||
	    _xmp_lwidth[i] != reflect->lo_width ||
	    _xmp_uwidth[i] != reflect->hi_width ||
	    _xmp_is_periodic[i] != reflect->is_periodic){

	  if (_xmp_reflect_pack_flag){
	    _XMP_reflect_pcopy_sched_dim(a, i, _xmp_lwidth[i], _xmp_uwidth[i], _xmp_is_periodic[i], _XMP_COMM_REFLECT);
	  }
	  else {
	    _XMP_reflect_normal_sched_dim(a, i, _xmp_lwidth[i], _xmp_uwidth[i], _xmp_is_periodic[i]);
	  }

	  reflect->reflect_is_initialized = 1;
	  reflect->lo_width = _xmp_lwidth[i];
	  reflect->hi_width = _xmp_uwidth[i];
	  reflect->is_periodic = _xmp_is_periodic[i];
	}

	if (_xmp_reflect_pack_flag && reflect->req[0] != MPI_REQUEST_NULL){
	  _XMP_TSTART(t0);
	  _XMP_reflect_pack_dim(a, i, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic, _XMP_COMM_REFLECT);
	  _XMP_TEND(xmptiming_.t_copy, t0);
	}

	_XMP_TSTART(t0);
	if (reflect->req[0] != MPI_REQUEST_NULL) // if req[0] isn't null, any others shouldn't be null.
	  MPI_Startall(4, reflect->req);
	_XMP_TEND2(xmptiming_.t_comm, xmptiming_.tdim_comm[i], t0);

	if (is_ordinal){
	  _XMP_TSTART(t0);
	  MPI_Waitall(4, reflect->req, MPI_STATUSES_IGNORE);
	  _XMP_TEND2(xmptiming_.t_wait, xmptiming_.tdim_wait[i], t0);
	  if (_xmp_reflect_pack_flag && reflect->req[0] != MPI_REQUEST_NULL){
	    _XMP_TSTART(t0);
	    _XMP_reflect_unpack_dim(a, i, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic);
	    _XMP_TEND(xmptiming_.t_copy, t0);
	  }
	}

      }

    }
    else { /* _XMP_N_SHADOW_FULL */
      _XMP_reflect_shadow_FULL(a->array_addr_p, a, i);
    }
    
  }
  _XMP_TEND(xmptiming_.t_sched, t0);

  //  t0 = MPI_Wtime();
  if (!is_ordinal)
    _XMP_reflect_wait(a, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic);
  //  t_wait = t_wait + (MPI_Wtime() - t0);

  _xmp_set_reflect_flag = 0;
  for (int i = 0; i < a->dim; i++){
    _xmp_lwidth[i] = 0;
    _xmp_uwidth[i] = 0;
    _xmp_is_periodic[i] = 0;
  }

}
Esempio n. 7
0
void _XMP_reflect_async_ordinal(_XMP_array_t *a, int async_id){

  int n = a->dim;
  _XMP_async_reflect_t *async_reflect;

  _Bool reusable_sched = false;

  if (!a->async_reflect){
    int max_nreqs = (pow(3, n) - 1) * 2;
    async_reflect = (_XMP_async_reflect_t *)_XMP_alloc(sizeof(_XMP_async_reflect_t));
    async_reflect->datatype = (MPI_Datatype *)_XMP_alloc(sizeof(MPI_Datatype) * max_nreqs);
    async_reflect->reqs = (MPI_Request *)_XMP_alloc(sizeof(MPI_Request) * max_nreqs);
    for (int i = 0; i < max_nreqs; i++){
      async_reflect->datatype[i] = MPI_DATATYPE_NULL;
      async_reflect->reqs[i] = MPI_REQUEST_NULL;
    }
    async_reflect->nreqs = 0;
    a->async_reflect = async_reflect;
  }
  else {
    reusable_sched = true;
    async_reflect = a->async_reflect;
    for (int i = 0; i < n; i++){
      if (async_reflect->lwidth[i] != _xmp_lwidth[i] ||
	  async_reflect->uwidth[i] != _xmp_uwidth[i] ||
	  async_reflect->is_periodic[i] != _xmp_is_periodic[i]){
	reusable_sched = false;
	break;
      }
    }
  }

  if (!reusable_sched){

    int lb[_XMP_N_MAX_DIM] = { 0 };
    int ub[_XMP_N_MAX_DIM] = { 0 };

    for (int i = 0; i < n; i++){
      async_reflect->lwidth[i] = _xmp_lwidth[i];
      async_reflect->uwidth[i] = _xmp_uwidth[i];
      async_reflect->is_periodic[i] = _xmp_is_periodic[i];

      if (_xmp_lwidth[i] > 0) lb[i] = -1;
      if (_xmp_uwidth[i] > 0) ub[i] = 1;
    }

    for (int i = 0; i < async_reflect->nreqs; i++){
      if (async_reflect->datatype[i] != MPI_DATATYPE_NULL)
	MPI_Type_free(&async_reflect->datatype[i]);
      if (async_reflect->reqs[i] != MPI_REQUEST_NULL)
	MPI_Request_free(&async_reflect->reqs[i]);
    }
    async_reflect->nreqs = 0;

    int ishadow[_XMP_N_MAX_DIM];
    for (ishadow[0] = lb[0]; ishadow[0] <= ub[0]; ishadow[0]++){
    for (ishadow[1] = lb[1]; ishadow[1] <= ub[1]; ishadow[1]++){
    for (ishadow[2] = lb[2]; ishadow[2] <= ub[2]; ishadow[2]++){
    for (ishadow[3] = lb[3]; ishadow[3] <= ub[3]; ishadow[3]++){
    for (ishadow[4] = lb[4]; ishadow[4] <= ub[4]; ishadow[4]++){
    for (ishadow[5] = lb[5]; ishadow[5] <= ub[5]; ishadow[5]++){
    for (ishadow[6] = lb[6]; ishadow[6] <= ub[6]; ishadow[6]++){

      // When ishadow > 0, upper shadow is to be updated, and vice versa.

      int nnzero = 0;
      for (int i = 0; i < n; i++){
	if (ishadow[i] != 0) nnzero++;
      }
      if (nnzero == 0) continue;

      _XMP_reflect_sched_dir(a, ishadow, _xmp_lwidth, _xmp_uwidth, _xmp_is_periodic);

    }}}}}}}

  }

  _XMP_async_comm_t *async = _XMP_get_current_async();
  MPI_Request *reqs = &async->reqs[async->nreqs];

  // copy to async
  if (async->nreqs + async_reflect->nreqs > _XMP_MAX_ASYNC_REQS){
    _XMP_fatal("too many arrays in an asynchronous reflect");
  }
  memcpy(reqs, async_reflect->reqs, async_reflect->nreqs * sizeof(MPI_Request));

  async->nreqs += async_reflect->nreqs;

  _XMP_TSTART(t0);
  MPI_Startall(async_reflect->nreqs, reqs);
  _XMP_TEND(xmptiming_.t_start, t0);

}
Esempio n. 8
0
static void _XMP_reflect_sched(_XMP_array_t *a, int *lwidth, int *uwidth,
			       int *is_periodic, int is_async, void *dev_addr)
{
  _XMP_TSTART(t0);
  for (int i = 0; i < a->dim; i++){

    _XMP_array_info_t *ai = &(a->info[i]);

    if (ai->shadow_type == _XMP_N_SHADOW_NONE){
      continue;
    }
    else if (ai->shadow_type == _XMP_N_SHADOW_NORMAL){

      _XMP_reflect_sched_t *reflect = ai->reflect_acc_sched;

      if(reflect == NULL){
	reflect = _XMP_alloc(sizeof(_XMP_reflect_sched_t));
	reflect->is_periodic = -1; /* not used yet */
	reflect->datatype_lo = MPI_DATATYPE_NULL;
	reflect->datatype_hi = MPI_DATATYPE_NULL;
	for (int j = 0; j < 4; j++) reflect->req[j] = MPI_REQUEST_NULL;
	reflect->lo_send_buf = NULL;
	reflect->lo_recv_buf = NULL;
	reflect->hi_send_buf = NULL;
	reflect->hi_recv_buf = NULL;
	reflect->lo_send_host_buf = NULL;
	reflect->lo_recv_host_buf = NULL;
	reflect->hi_send_host_buf = NULL;
	reflect->hi_recv_host_buf = NULL;
	ai->reflect_acc_sched = reflect;
      }else{
	//
      }

      if (1/*lwidth[i] || uwidth[i]*/){

	_XMP_ASSERT(reflect);

	if (reflect->is_periodic == -1 /* not set yet */ ||
	    lwidth[i] != reflect->lo_width ||
	    uwidth[i] != reflect->hi_width ||
	    is_periodic[i] != reflect->is_periodic){

	  reflect->lo_width = lwidth[i];
	  reflect->hi_width = uwidth[i];
	  reflect->is_periodic = is_periodic[i];

	  if (/*_xmp_reflect_pack_flag && !is_async*/ 1){
	    _XMP_reflect_pcopy_sched_dim(a, i, lwidth[i], uwidth[i], is_periodic[i], dev_addr, lwidth, uwidth);
	  }
	  else {
	    //_XMP_reflect_normal_sched_dim(a, i, lwidth[i], uwidth[i], is_periodic[i]);
	  }
	}
      }

    }
    else { /* _XMP_N_SHADOW_FULL */
      ;
    }
    
  }
  _XMP_TEND(xmptiming_.t_sched, t0);

}