示例#1
0
static void _XMP_reflect_start(_XMP_array_t *a, int *lwidth, int *uwidth, int *is_periodic,
			       int tag)
{
  _XMP_TSTART(t1);

  xmp_barrier();

  for (int i = 0; i < a->dim; i++){

    _XMP_reflect_sched_t *reflect = a->info[i].reflect_sched;

    _XMP_TSTART(t0);

    // for lower reflect

    if (lwidth[i] && reflect->hi_rank != -1){
      for (int j = 0; j < reflect->count; j++){
	FJMPI_Rdma_put(reflect->hi_rank, tag,
		       (uint64_t)reflect->lo_recv_array + j * reflect->stride,
		       (uint64_t)reflect->lo_send_array + j * reflect->stride,
		       lwidth[i] * reflect->blocklength,
		       FJMPI_RDMA_LOCAL_NIC0 | FJMPI_RDMA_REMOTE_NIC2);
      }
    }

    // for upper reflect

    if (uwidth[i] && reflect->lo_rank != -1){
      for (int j = 0; j < reflect->count; j++){
	FJMPI_Rdma_put(reflect->lo_rank, tag,
		       (uint64_t)reflect->hi_recv_array + j * reflect->stride,
		       (uint64_t)reflect->hi_send_array + j * reflect->stride,
		       uwidth[i] * reflect->blocklength,
		       FJMPI_RDMA_LOCAL_NIC1 | FJMPI_RDMA_REMOTE_NIC3);
      }
    }

    _XMP_TEND(xmptiming_.tdim_comm[i], t0);

  }

  _XMP_TEND(xmptiming_.t_comm, t1);

}
void _XMP_fjrdma_atomic_define(int target_rank, _XMP_coarray_t *dst_desc, size_t dst_offset, int value,
			       _XMP_coarray_t *src_desc, size_t src_offset, size_t elmt_size)
{
  uint64_t raddr = (uint64_t)dst_desc->addr[target_rank] + elmt_size * dst_offset;
  uint64_t laddr;
  if(src_desc == NULL)
    laddr = FJMPI_Rdma_reg_mem(_XMP_TEMP_MEMID, &value, elmt_size);
  else
    laddr = src_desc->laddr + elmt_size * src_offset;

  FJMPI_Rdma_put(target_rank, _XMP_FJRDMA_TAG, raddr, laddr, elmt_size, _XMP_COARRAY_FLAG_NIC);
  _XMP_add_num_of_puts();
  _XMP_fjrdma_sync_memory_put(); // ensure to complete the above put operation.

  if(src_desc == NULL)
    FJMPI_Rdma_dereg_mem(_XMP_TEMP_MEMID);
}